From dc11cc89f79a33eac33edafb23e9010734e70a09 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Tue, 17 Jan 2023 16:04:23 +0100 Subject: [PATCH 01/47] first commit adding admin endpoint to modify users tier. TODO: clear cache --- rust-toolchain.toml | 2 + web3_proxy/src/frontend/mod.rs | 1 + web3_proxy/src/frontend/users.rs | 142 ++++++++++++++++++++++++++++++- 3 files changed, 141 insertions(+), 4 deletions(-) create mode 100644 rust-toolchain.toml diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 00000000..aaddc2c7 --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "1.65.0" \ No newline at end of file diff --git a/web3_proxy/src/frontend/mod.rs b/web3_proxy/src/frontend/mod.rs index 4d94367c..b26e963d 100644 --- a/web3_proxy/src/frontend/mod.rs +++ b/web3_proxy/src/frontend/mod.rs @@ -85,6 +85,7 @@ pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<() get(users::user_stats_aggregated_get), ) .route("/user/stats/detailed", get(users::user_stats_detailed_get)) + .route("/user/modify_role", get(users::admin_change_user_roles)) .route("/user/logout", post(users::user_logout_post)) .route("/status", get(status::status)) // layers are ordered bottom up diff --git a/web3_proxy/src/frontend/users.rs b/web3_proxy/src/frontend/users.rs index 44d066ba..bd57e252 100644 --- a/web3_proxy/src/frontend/users.rs +++ b/web3_proxy/src/frontend/users.rs @@ -3,10 +3,11 @@ use super::authorization::{login_is_authorized, RpcSecretKey}; use super::errors::FrontendResult; use crate::app::Web3ProxyApp; -use crate::user_queries::get_page_from_params; +use crate::user_queries::{get_page_from_params, get_user_id_from_params}; use crate::user_queries::{ get_chain_id_from_params, get_query_start_from_params, query_user_stats, StatResponse, }; +use entities::prelude::{User, SecondaryUser}; use crate::user_token::UserBearerToken; use anyhow::Context; use axum::headers::{Header, Origin, Referer, UserAgent}; @@ -19,14 +20,14 @@ use axum::{ use axum_client_ip::ClientIp; use axum_macros::debug_handler; use chrono::{TimeZone, Utc}; -use entities::sea_orm_active_enums::LogLevel; -use entities::{login, pending_login, revert_log, rpc_key, user}; +use entities::sea_orm_active_enums::{LogLevel, Role}; +use entities::{login, pending_login, revert_log, rpc_key, secondary_user, user, user_tier}; use ethers::{prelude::Address, types::Bytes}; use hashbrown::HashMap; use http::{HeaderValue, StatusCode}; use ipnet::IpNet; use itertools::Itertools; -use log::{debug, warn}; +use log::{debug, info, warn}; use migration::sea_orm::prelude::Uuid; use migration::sea_orm::{ self, ActiveModelTrait, ColumnTrait, EntityTrait, IntoActiveModel, PaginatorTrait, QueryFilter, @@ -40,6 +41,7 @@ use std::str::FromStr; use std::sync::Arc; use time::{Duration, OffsetDateTime}; use ulid::Ulid; +use crate::frontend::errors::FrontendErrorResponse; /// `GET /user/login/:user_address` or `GET /user/login/:user_address/:message_eip` -- Start the "Sign In with Ethereum" (siwe) login flow. /// @@ -853,3 +855,135 @@ pub async fn user_stats_detailed_get( Ok(response) } + +/// `GET /user/stats/detailed` -- Use a bearer token to get the user's key stats such as bandwidth used and methods requested. +/// +/// If no bearer is provided, detailed stats for all users will be shown. +/// View a single user with `?user_id=$x`. +/// View a single chain with `?chain_id=$x`. +/// +/// Set `$x` to zero to see all. +/// +/// TODO: this will change as we add better support for secondary users. +#[debug_handler] +pub async fn admin_change_user_roles( + Extension(app): Extension>, + bearer: Option>>, + Query(params): Query>, +) -> FrontendResult { + + // Make sure that the bearer exists, and has admin rights ... + let user_address: Vec = params + .get("user_address") + .ok_or_else(|| + FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to find user_address key in request".to_string(), + None, + ) + )? + .parse::
() + .map_err(|err| { + FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to parse user_address as an Address".to_string(), + Some(err.into()), + ) + })? + .to_fixed_bytes().into(); + let user_tier_title = params + .get("user_tier_title") + .ok_or_else(|| FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to get the user_tier_title key from the request".to_string(), + None, + ))?; + + // Create database connections and all that + let db_conn = app.db_conn().context("admin_change_user_roles needs a db")?; + let db_replica = app + .db_replica() + .context("admin_change_user_roles needs a db replica")?; + let mut redis_conn = app + .redis_conn() + .await + .context("admin_change_user_roles had a redis connection error")? + .context("admin_change_user_roles needs a redis")?; + + // TODO: Make a single query, where you retrieve the user, and directly from it the secondary user (otherwise we do two jumpy, which is unnecessary) + // get the user id first. if it is 0, we should use a cache on the app + let user_id = get_user_id_from_params(&mut redis_conn, &db_conn, &db_replica, bearer, ¶ms).await?; + + let mut response_body = HashMap::new(); + response_body.insert( + "user_id", + serde_json::Value::Number(user_id.into()), + ); + + // Get both the user and user role + let user: user::Model = user::Entity::find_by_id(user_id) + .one(&db_conn) + .await? + .context("No user with this id found!")?; + // TODO: Let's connect the string, and find the previous string of the user id ... (this might be ok for now too thought) + response_body.insert( + "previous_tier", + serde_json::Value::Number(user.user_tier_id.into()), + ); + + // Modify the user-role ... + // Check if this use has admin privileges ... + let user_role: secondary_user::Model = secondary_user::Entity::find() + .filter(secondary_user::Column::UserId.eq(user_id)) + .one(&db_conn) + .await? + .context("No user tier found with that name")?; + println!("User role is: {:?}", user_role); + + // Return error if the user is not an admin or a user + match user_role.role { + Role::Owner | Role::Admin => { + // Change the user tier, we can copy a bunch of the functionality from the user-tier address + + // Check if all the required parameters are included in the request, if not, return an error + let user = user::Entity::find() + .filter(user::Column::Address.eq(user_address)) + .one(&db_conn) + .await? + .context("No user found with that key")?; + + // TODO: don't serialize the rpc key + debug!("user: {:#?}", user); + + let user_tier = user_tier::Entity::find() + .filter(user_tier::Column::Title.eq(user_tier_title.clone())) + .one(&db_conn) + .await? + .context("No user tier found with that name")?; + debug!("user_tier: {:#?}", user_tier); + + if user.user_tier_id == user_tier.id { + info!("user already has that tier"); + } else { + let mut user = user.into_active_model(); + + user.user_tier_id = sea_orm::Set(user_tier.id); + + user.save(&db_conn).await?; + + info!("user's tier changed"); + } + + } + Role::Collaborator => { + return Err(anyhow::anyhow!("you do not have admin rights!").into()); + } + }; + + response_body.insert( + "user_id", + serde_json::Value::Number(user_id.into()), + ); + let mut response = Json(&response_body).into_response(); + Ok(response) +} From 62f89a127d8eb5273dd950adb0210bc095cae1d3 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Tue, 17 Jan 2023 19:55:54 +0100 Subject: [PATCH 02/47] will stash wrong entities --- entities/src/admin.rs | 33 +++++ entities/src/login.rs | 6 +- entities/src/mod.rs | 4 +- migration/src/lib.rs | 2 + migration/src/m20230117_191358_admin_table.rs | 58 ++++++++ web3_proxy/src/admin_queries.rs | 124 ++++++++++++++++++ web3_proxy/src/frontend/mod.rs | 2 +- web3_proxy/src/frontend/users.rs | 117 +---------------- web3_proxy/src/lib.rs | 1 + web3_proxy/src/user_queries.rs | 1 + 10 files changed, 227 insertions(+), 121 deletions(-) create mode 100644 entities/src/admin.rs create mode 100644 migration/src/m20230117_191358_admin_table.rs create mode 100644 web3_proxy/src/admin_queries.rs diff --git a/entities/src/admin.rs b/entities/src/admin.rs new file mode 100644 index 00000000..d1d46999 --- /dev/null +++ b/entities/src/admin.rs @@ -0,0 +1,33 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6 + +use sea_orm::entity::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] +#[sea_orm(table_name = "admin")] +pub struct Model { + #[sea_orm(primary_key)] + pub id: u64, + #[sea_orm(unique)] + pub user_id: u64, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::user::Entity", + from = "Column::UserId", + to = "super::user::Column::Id", + on_update = "NoAction", + on_delete = "NoAction" + )] + User, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::User.def() + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/entities/src/login.rs b/entities/src/login.rs index e2600e84..82a78e42 100644 --- a/entities/src/login.rs +++ b/entities/src/login.rs @@ -1,6 +1,5 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6 -use crate::serialization; use sea_orm::entity::prelude::*; use serde::{Deserialize, Serialize}; @@ -10,8 +9,7 @@ pub struct Model { #[sea_orm(primary_key)] pub id: u64, #[sea_orm(unique)] - #[serde(serialize_with = "serialization::uuid_as_ulid")] - pub bearer_token: Uuid, + pub bearer_token: Vec, pub user_id: u64, pub expires_at: DateTimeUtc, } diff --git a/entities/src/mod.rs b/entities/src/mod.rs index 5325e16d..4bfeb24d 100644 --- a/entities/src/mod.rs +++ b/entities/src/mod.rs @@ -1,7 +1,8 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6 pub mod prelude; +pub mod admin; pub mod login; pub mod pending_login; pub mod revert_log; @@ -9,6 +10,5 @@ pub mod rpc_accounting; pub mod rpc_key; pub mod sea_orm_active_enums; pub mod secondary_user; -pub mod serialization; pub mod user; pub mod user_tier; diff --git a/migration/src/lib.rs b/migration/src/lib.rs index 0f221af2..e5ab068f 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -12,6 +12,7 @@ mod m20221101_222349_archive_request; mod m20221108_200345_save_anon_stats; mod m20221211_124002_request_method_privacy; mod m20221213_134158_move_login_into_database; +mod m20230117_191358_admin_table; pub struct Migrator; @@ -31,6 +32,7 @@ impl MigratorTrait for Migrator { Box::new(m20221108_200345_save_anon_stats::Migration), Box::new(m20221211_124002_request_method_privacy::Migration), Box::new(m20221213_134158_move_login_into_database::Migration), + Box::new(m20230117_191358_admin_table::Migration), ] } } diff --git a/migration/src/m20230117_191358_admin_table.rs b/migration/src/m20230117_191358_admin_table.rs new file mode 100644 index 00000000..2505d314 --- /dev/null +++ b/migration/src/m20230117_191358_admin_table.rs @@ -0,0 +1,58 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Replace the sample below with your own migration scripts + manager + .create_table( + Table::create() + .table(Admin::Table) + .col( + ColumnDef::new(Admin::Id) + .big_unsigned() + .not_null() + .auto_increment() + .primary_key(), + ) + .col( + ColumnDef::new(Admin::UserId) + .big_unsigned() + .unique_key() + .not_null() + ) + .foreign_key( + ForeignKey::create() + .name("fk-admin-user_id") + .from(Admin::Table, Admin::UserId) + .to(User::Table, User::Id), + ) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Replace the sample below with your own migration scripts + manager + .drop_table(Table::drop().table(Admin::Table).to_owned()) + .await + } +} + +/// Learn more at https://docs.rs/sea-query#iden +#[derive(Iden)] +enum User { + Table, + Id +} + +#[derive(Iden)] +enum Admin { + Table, + Id, + UserId, +} diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs new file mode 100644 index 00000000..c23e79a3 --- /dev/null +++ b/web3_proxy/src/admin_queries.rs @@ -0,0 +1,124 @@ +use crate::app::Web3ProxyApp; +use crate::frontend::errors::FrontendErrorResponse; +use crate::user_queries::get_user_id_from_params; +use anyhow::Context; +use axum::{ + Json, + headers::{authorization::Bearer, Authorization}, + TypedHeader, +}; +use axum::response::{IntoResponse, Response}; +use entities::{admin, user, user_tier}; +use ethers::prelude::Address; +use hashbrown::HashMap; +use http::StatusCode; +use migration::sea_orm::{self, IntoActiveModel}; +use log::info; + + +pub async fn query_admin_modify_usertier<'a>( + app: &'a Web3ProxyApp, + bearer: Option>>, + params: &'a HashMap +) -> Result { + + // Quickly return if any of the input tokens are bad + let user_address: Vec = params + .get("user_address") + .ok_or_else(|| + FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to find user_address key in request".to_string(), + None, + ) + )? + .parse::
() + .map_err(|err| { + FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to parse user_address as an Address".to_string(), + Some(err.into()), + ) + })? + .to_fixed_bytes().into(); + let user_tier_title = params + .get("user_tier_title") + .ok_or_else(|| FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to get the user_tier_title key from the request".to_string(), + None, + ))?; + + // Prepare output body + let mut response_body = HashMap::new(); + response_body.insert( + "user_address", + serde_json::Value::String(user_address.into()), + ); + response_body.insert( + "user_tier_title", + serde_json::Value::String(user_tier_title.into()), + ); + + // Establish connections + let db_conn = app.db_conn().context("query_admin_modify_user needs a db")?; + let db_replica = app + .db_replica() + .context("query_user_stats needs a db replica")?; + let mut redis_conn = app + .redis_conn() + .await + .context("query_admin_modify_user had a redis connection error")? + .context("query_admin_modify_user needs a redis")?; + + // Try to get the user who is calling from redis (if existent) / else from the database + // TODO: Make a single query, where you retrieve the user, and directly from it the secondary user (otherwise we do two jumpy, which is unnecessary) + // get the user id first. if it is 0, we should use a cache on the app + let caller_id = get_user_id_from_params(&mut redis_conn, &db_conn, &db_replica, bearer, ¶ms).await?; + + // Check if the caller is an admin (i.e. if he is in an admin table) + let admin: admin::Model = admin::Entity::find() + .filter(admin::Entity::UserId.eq(caller_id)) + .one(&db_replica) + .await? + .context("This user is not registered as an admin")?; + + // If we are here, that means an admin was found, and we can safely proceed + + // Fetch the admin, and the user + let user: user::Model = user::Entity::find() + .filter(user::Column::Address.eq(user_address)) + .one(&db_replica) + .await? + .context("No user with this id found as the change")?; + // Return early if the target user_tier_id is the same as the original user_tier_id + response_body.insert( + "user_tier_title", + serde_json::Value::String(user.user_tier_id.into()), + ); + + // Now we can modify the user's tier + let new_user_tier: user_tier::Model = user_tier::Entity::find() + .filter(user_tier::Column::Title.eq(user_tier_title.clone())) + .one(&db_replica) + .await? + .context("No user tier found with that name")?; + + if user.user_tier_id == new_user_tier.id { + info!("user already has that tier"); + } else { + let mut user = user.into_active_model(); + + user.user_tier_id = sea_orm::Set(new_user_tier.id); + + user.save(&db_conn).await?; + + info!("user's tier changed"); + } + + // Finally, remove the user from redis + // TODO: Also remove the user from the redis + + Ok(Json(&response_body).into_response()) + +} diff --git a/web3_proxy/src/frontend/mod.rs b/web3_proxy/src/frontend/mod.rs index b26e963d..092102a5 100644 --- a/web3_proxy/src/frontend/mod.rs +++ b/web3_proxy/src/frontend/mod.rs @@ -85,7 +85,7 @@ pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<() get(users::user_stats_aggregated_get), ) .route("/user/stats/detailed", get(users::user_stats_detailed_get)) - .route("/user/modify_role", get(users::admin_change_user_roles)) + .route("/admin/modify_role", get(users::admin_change_user_roles)) .route("/user/logout", post(users::user_logout_post)) .route("/status", get(status::status)) // layers are ordered bottom up diff --git a/web3_proxy/src/frontend/users.rs b/web3_proxy/src/frontend/users.rs index bd57e252..fdde4ade 100644 --- a/web3_proxy/src/frontend/users.rs +++ b/web3_proxy/src/frontend/users.rs @@ -41,6 +41,7 @@ use std::str::FromStr; use std::sync::Arc; use time::{Duration, OffsetDateTime}; use ulid::Ulid; +use crate::admin_queries::query_admin_modify_usertier; use crate::frontend::errors::FrontendErrorResponse; /// `GET /user/login/:user_address` or `GET /user/login/:user_address/:message_eip` -- Start the "Sign In with Ethereum" (siwe) login flow. @@ -871,119 +872,7 @@ pub async fn admin_change_user_roles( bearer: Option>>, Query(params): Query>, ) -> FrontendResult { + let response = query_admin_modify_usertier(&app, bearer, ¶ms).await?; - // Make sure that the bearer exists, and has admin rights ... - let user_address: Vec = params - .get("user_address") - .ok_or_else(|| - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to find user_address key in request".to_string(), - None, - ) - )? - .parse::
() - .map_err(|err| { - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to parse user_address as an Address".to_string(), - Some(err.into()), - ) - })? - .to_fixed_bytes().into(); - let user_tier_title = params - .get("user_tier_title") - .ok_or_else(|| FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to get the user_tier_title key from the request".to_string(), - None, - ))?; - - // Create database connections and all that - let db_conn = app.db_conn().context("admin_change_user_roles needs a db")?; - let db_replica = app - .db_replica() - .context("admin_change_user_roles needs a db replica")?; - let mut redis_conn = app - .redis_conn() - .await - .context("admin_change_user_roles had a redis connection error")? - .context("admin_change_user_roles needs a redis")?; - - // TODO: Make a single query, where you retrieve the user, and directly from it the secondary user (otherwise we do two jumpy, which is unnecessary) - // get the user id first. if it is 0, we should use a cache on the app - let user_id = get_user_id_from_params(&mut redis_conn, &db_conn, &db_replica, bearer, ¶ms).await?; - - let mut response_body = HashMap::new(); - response_body.insert( - "user_id", - serde_json::Value::Number(user_id.into()), - ); - - // Get both the user and user role - let user: user::Model = user::Entity::find_by_id(user_id) - .one(&db_conn) - .await? - .context("No user with this id found!")?; - // TODO: Let's connect the string, and find the previous string of the user id ... (this might be ok for now too thought) - response_body.insert( - "previous_tier", - serde_json::Value::Number(user.user_tier_id.into()), - ); - - // Modify the user-role ... - // Check if this use has admin privileges ... - let user_role: secondary_user::Model = secondary_user::Entity::find() - .filter(secondary_user::Column::UserId.eq(user_id)) - .one(&db_conn) - .await? - .context("No user tier found with that name")?; - println!("User role is: {:?}", user_role); - - // Return error if the user is not an admin or a user - match user_role.role { - Role::Owner | Role::Admin => { - // Change the user tier, we can copy a bunch of the functionality from the user-tier address - - // Check if all the required parameters are included in the request, if not, return an error - let user = user::Entity::find() - .filter(user::Column::Address.eq(user_address)) - .one(&db_conn) - .await? - .context("No user found with that key")?; - - // TODO: don't serialize the rpc key - debug!("user: {:#?}", user); - - let user_tier = user_tier::Entity::find() - .filter(user_tier::Column::Title.eq(user_tier_title.clone())) - .one(&db_conn) - .await? - .context("No user tier found with that name")?; - debug!("user_tier: {:#?}", user_tier); - - if user.user_tier_id == user_tier.id { - info!("user already has that tier"); - } else { - let mut user = user.into_active_model(); - - user.user_tier_id = sea_orm::Set(user_tier.id); - - user.save(&db_conn).await?; - - info!("user's tier changed"); - } - - } - Role::Collaborator => { - return Err(anyhow::anyhow!("you do not have admin rights!").into()); - } - }; - - response_body.insert( - "user_id", - serde_json::Value::Number(user_id.into()), - ); - let mut response = Json(&response_body).into_response(); - Ok(response) + response } diff --git a/web3_proxy/src/lib.rs b/web3_proxy/src/lib.rs index 0ae97055..7fc9ff97 100644 --- a/web3_proxy/src/lib.rs +++ b/web3_proxy/src/lib.rs @@ -1,5 +1,6 @@ pub mod app; pub mod app_stats; +pub mod admin_queries; pub mod block_number; pub mod config; pub mod frontend; diff --git a/web3_proxy/src/user_queries.rs b/web3_proxy/src/user_queries.rs index 6d2f5a94..f777f857 100644 --- a/web3_proxy/src/user_queries.rs +++ b/web3_proxy/src/user_queries.rs @@ -9,6 +9,7 @@ use axum::{ TypedHeader, }; use chrono::{NaiveDateTime, Utc}; +use ethers::prelude::Address; use entities::{login, rpc_accounting, rpc_key}; use hashbrown::HashMap; use http::StatusCode; From f0be397b09ca43ab46c9d1e968acc4ca1a558765 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Tue, 17 Jan 2023 20:12:40 +0100 Subject: [PATCH 03/47] tiny refactoring and added admin entities. some bugs coming up --- entities/src/admin.rs | 1 + entities/src/mod.rs | 1 + web3_proxy/src/admin_queries.rs | 20 +++------- web3_proxy/src/frontend/admin.rs | 65 ++++++++++++++++++++++++++++++++ web3_proxy/src/frontend/mod.rs | 3 +- web3_proxy/src/frontend/users.rs | 20 ---------- 6 files changed, 75 insertions(+), 35 deletions(-) create mode 100644 web3_proxy/src/frontend/admin.rs diff --git a/entities/src/admin.rs b/entities/src/admin.rs index d1d46999..dc8a737d 100644 --- a/entities/src/admin.rs +++ b/entities/src/admin.rs @@ -1,5 +1,6 @@ //! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6 +use crate::serialization; use sea_orm::entity::prelude::*; use serde::{Deserialize, Serialize}; diff --git a/entities/src/mod.rs b/entities/src/mod.rs index 4bfeb24d..f0ac2711 100644 --- a/entities/src/mod.rs +++ b/entities/src/mod.rs @@ -10,5 +10,6 @@ pub mod rpc_accounting; pub mod rpc_key; pub mod sea_orm_active_enums; pub mod secondary_user; +pub mod serialization; pub mod user; pub mod user_tier; diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index c23e79a3..92f75d51 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -12,7 +12,7 @@ use entities::{admin, user, user_tier}; use ethers::prelude::Address; use hashbrown::HashMap; use http::StatusCode; -use migration::sea_orm::{self, IntoActiveModel}; +use migration::sea_orm::{self, ActiveModelTrait, ColumnTrait, EntityTrait, IntoActiveModel, QueryFilter}; use log::info; @@ -51,14 +51,6 @@ pub async fn query_admin_modify_usertier<'a>( // Prepare output body let mut response_body = HashMap::new(); - response_body.insert( - "user_address", - serde_json::Value::String(user_address.into()), - ); - response_body.insert( - "user_tier_title", - serde_json::Value::String(user_tier_title.into()), - ); // Establish connections let db_conn = app.db_conn().context("query_admin_modify_user needs a db")?; @@ -78,8 +70,8 @@ pub async fn query_admin_modify_usertier<'a>( // Check if the caller is an admin (i.e. if he is in an admin table) let admin: admin::Model = admin::Entity::find() - .filter(admin::Entity::UserId.eq(caller_id)) - .one(&db_replica) + .filter(admin::Column::UserId.eq(caller_id)) + .one(&db_conn) .await? .context("This user is not registered as an admin")?; @@ -88,19 +80,19 @@ pub async fn query_admin_modify_usertier<'a>( // Fetch the admin, and the user let user: user::Model = user::Entity::find() .filter(user::Column::Address.eq(user_address)) - .one(&db_replica) + .one(&db_conn) .await? .context("No user with this id found as the change")?; // Return early if the target user_tier_id is the same as the original user_tier_id response_body.insert( "user_tier_title", - serde_json::Value::String(user.user_tier_id.into()), + serde_json::Value::Number(user.user_tier_id.into()), ); // Now we can modify the user's tier let new_user_tier: user_tier::Model = user_tier::Entity::find() .filter(user_tier::Column::Title.eq(user_tier_title.clone())) - .one(&db_replica) + .one(&db_conn) .await? .context("No user tier found with that name")?; diff --git a/web3_proxy/src/frontend/admin.rs b/web3_proxy/src/frontend/admin.rs new file mode 100644 index 00000000..8f6273e3 --- /dev/null +++ b/web3_proxy/src/frontend/admin.rs @@ -0,0 +1,65 @@ +//! Handle admin helper logic + +use super::authorization::{login_is_authorized, RpcSecretKey}; +use super::errors::FrontendResult; +use crate::app::Web3ProxyApp; +use crate::user_queries::{get_page_from_params, get_user_id_from_params}; +use crate::user_queries::{ + get_chain_id_from_params, get_query_start_from_params, query_user_stats, StatResponse, +}; +use entities::prelude::{User, SecondaryUser}; +use crate::user_token::UserBearerToken; +use anyhow::Context; +use axum::headers::{Header, Origin, Referer, UserAgent}; +use axum::{ + extract::{Path, Query}, + headers::{authorization::Bearer, Authorization}, + response::IntoResponse, + Extension, Json, TypedHeader, +}; +use axum_client_ip::ClientIp; +use axum_macros::debug_handler; +use chrono::{TimeZone, Utc}; +use entities::sea_orm_active_enums::{LogLevel, Role}; +use entities::{login, pending_login, revert_log, rpc_key, secondary_user, user, user_tier}; +use ethers::{prelude::Address, types::Bytes}; +use hashbrown::HashMap; +use http::{HeaderValue, StatusCode}; +use ipnet::IpNet; +use itertools::Itertools; +use log::{debug, info, warn}; +use migration::sea_orm::prelude::Uuid; +use migration::sea_orm::{ + self, ActiveModelTrait, ColumnTrait, EntityTrait, IntoActiveModel, PaginatorTrait, QueryFilter, + QueryOrder, TransactionTrait, TryIntoModel, +}; +use serde::Deserialize; +use serde_json::json; +use siwe::{Message, VerificationOpts}; +use std::ops::Add; +use std::str::FromStr; +use std::sync::Arc; +use time::{Duration, OffsetDateTime}; +use ulid::Ulid; +use crate::admin_queries::query_admin_modify_usertier; +use crate::frontend::errors::FrontendErrorResponse; + +/// `GET /admin/modify_role` -- Use a bearer token to get the user's key stats such as bandwidth used and methods requested. +/// +/// If no bearer is provided, detailed stats for all users will be shown. +/// View a single user with `?user_id=$x`. +/// View a single chain with `?chain_id=$x`. +/// +/// Set `$x` to zero to see all. +/// +/// TODO: this will change as we add better support for secondary users. +#[debug_handler] +pub async fn admin_change_user_roles( + Extension(app): Extension>, + bearer: Option>>, + Query(params): Query>, +) -> FrontendResult { + let response = query_admin_modify_usertier(&app, bearer, ¶ms).await?; + + Ok(response) +} diff --git a/web3_proxy/src/frontend/mod.rs b/web3_proxy/src/frontend/mod.rs index 092102a5..8b8d8f86 100644 --- a/web3_proxy/src/frontend/mod.rs +++ b/web3_proxy/src/frontend/mod.rs @@ -1,5 +1,6 @@ //! `frontend` contains HTTP and websocket endpoints for use by users and admins. +pub mod admin; pub mod authorization; pub mod errors; // TODO: these are only public so docs are generated. What's a better way to do this? @@ -85,7 +86,7 @@ pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<() get(users::user_stats_aggregated_get), ) .route("/user/stats/detailed", get(users::user_stats_detailed_get)) - .route("/admin/modify_role", get(users::admin_change_user_roles)) + .route("/admin/modify_role", get(admin::admin_change_user_roles)) .route("/user/logout", post(users::user_logout_post)) .route("/status", get(status::status)) // layers are ordered bottom up diff --git a/web3_proxy/src/frontend/users.rs b/web3_proxy/src/frontend/users.rs index fdde4ade..2346be00 100644 --- a/web3_proxy/src/frontend/users.rs +++ b/web3_proxy/src/frontend/users.rs @@ -856,23 +856,3 @@ pub async fn user_stats_detailed_get( Ok(response) } - -/// `GET /user/stats/detailed` -- Use a bearer token to get the user's key stats such as bandwidth used and methods requested. -/// -/// If no bearer is provided, detailed stats for all users will be shown. -/// View a single user with `?user_id=$x`. -/// View a single chain with `?chain_id=$x`. -/// -/// Set `$x` to zero to see all. -/// -/// TODO: this will change as we add better support for secondary users. -#[debug_handler] -pub async fn admin_change_user_roles( - Extension(app): Extension>, - bearer: Option>>, - Query(params): Query>, -) -> FrontendResult { - let response = query_admin_modify_usertier(&app, bearer, ¶ms).await?; - - response -} From 95c4ba3273d87b1db56840cca0286407fca81e92 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Tue, 17 Jan 2023 20:15:46 +0100 Subject: [PATCH 04/47] not sure why login was modified --- entities/src/login.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/entities/src/login.rs b/entities/src/login.rs index 82a78e42..1be72895 100644 --- a/entities/src/login.rs +++ b/entities/src/login.rs @@ -1,5 +1,6 @@ //! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6 +use crate::serialization; use sea_orm::entity::prelude::*; use serde::{Deserialize, Serialize}; @@ -9,7 +10,8 @@ pub struct Model { #[sea_orm(primary_key)] pub id: u64, #[sea_orm(unique)] - pub bearer_token: Vec, + #[serde(serialize_with = "serialization::uuid_as_ulid")] + pub bearer_token: Uuid, pub user_id: u64, pub expires_at: DateTimeUtc, } From 74f5a27f0b4270b7f2d6189558f68d66667e9b7e Mon Sep 17 00:00:00 2001 From: yenicelik Date: Tue, 17 Jan 2023 21:44:16 +0100 Subject: [PATCH 05/47] added binary to change admin status of a user --- web3_proxy/src/admin_queries.rs | 4 ++ .../change_user_admin_status.rs | 64 +++++++++++++++++++ web3_proxy/src/bin/web3_proxy_cli/main.rs | 7 ++ 3 files changed, 75 insertions(+) create mode 100644 web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index 92f75d51..32e0ab84 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -14,6 +14,7 @@ use hashbrown::HashMap; use http::StatusCode; use migration::sea_orm::{self, ActiveModelTrait, ColumnTrait, EntityTrait, IntoActiveModel, QueryFilter}; use log::info; +use redis_rate_limiter::redis::AsyncCommands; pub async fn query_admin_modify_usertier<'a>( @@ -110,6 +111,9 @@ pub async fn query_admin_modify_usertier<'a>( // Finally, remove the user from redis // TODO: Also remove the user from the redis + // redis_conn.zrem(); + // redis_conn.get::<_, u64>(&user.) // TODO: Where do i find the bearer token ... + Ok(Json(&response_body).into_response()) diff --git a/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs b/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs new file mode 100644 index 00000000..3058cb7e --- /dev/null +++ b/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs @@ -0,0 +1,64 @@ +use anyhow::Context; +use argh::FromArgs; +use entities::{admin, user}; +use ethers::types::Address; +use log::{debug, info}; +use migration::sea_orm::{ + self, ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, ModelTrait, IntoActiveModel, + QueryFilter, +}; + +/// change a user's admin status. eiter they are an admin, or they aren't +#[derive(FromArgs, PartialEq, Eq, Debug)] +#[argh(subcommand, name = "change_admin_status")] +pub struct ChangeUserAdminStatusSubCommand { + /// the address of the user whose admin status you want to modify + #[argh(positional)] + address: String, + + /// true if the user should be an admin, false otherwise + #[argh(positional)] + should_be_admin: bool, +} + +impl ChangeUserAdminStatusSubCommand { + pub async fn main(self, db_conn: &DatabaseConnection) -> anyhow::Result<()> { + let address: Address = self.address.parse()?; + let should_be_admin: bool = self.should_be_admin; + + let address: Vec = address.to_fixed_bytes().into(); + + // Find user in database + let user = user::Entity::find() + .filter(user::Column::Address.eq(address.clone())) + .one(db_conn) + .await? + .context("No user found with that address")?; + + // Check if there is a record in the database + let mut admin = admin::Entity::find() + .filter(admin::Column::UserId.eq(address)) + .all(db_conn) + .await?; + + debug!("user: {:#?}", user); + + match admin.pop() { + None if should_be_admin => { + // User is not an admin yet, but should be + let new_admin = admin::ActiveModel { + user_id: sea_orm::Set(user.id), + ..Default::default() + }; + new_admin.insert(db_conn).await?; + }, + Some(old_admin) if !should_be_admin => { + // User is already an admin, but shouldn't be + old_admin.delete(db_conn).await?; + }, + _ => {} + } + + Ok(()) + } +} diff --git a/web3_proxy/src/bin/web3_proxy_cli/main.rs b/web3_proxy/src/bin/web3_proxy_cli/main.rs index c60b9446..fa519162 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/main.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/main.rs @@ -1,4 +1,5 @@ mod change_user_address; +mod change_user_admin_status; mod change_user_tier; mod change_user_tier_by_address; mod change_user_tier_by_key; @@ -44,6 +45,7 @@ pub struct CliConfig { enum SubCommand { ChangeUserAddress(change_user_address::ChangeUserAddressSubCommand), ChangeUserTier(change_user_tier::ChangeUserTierSubCommand), + ChangeUserAdminStatus(change_user_admin_status::ChangeUserAdminStatusSubCommand), ChangeUserTierByAddress(change_user_tier_by_address::ChangeUserTierByAddressSubCommand), ChangeUserTierByKey(change_user_tier_by_key::ChangeUserTierByKeySubCommand), CheckConfig(check_config::CheckConfigSubCommand), @@ -100,6 +102,11 @@ async fn main() -> anyhow::Result<()> { x.main(&db_conn).await } + SubCommand::ChangeUserAdminStatus(x) => { + let db_conn = get_db(cli_config.db_url, 1, 1).await?; + + x.main(&db_conn).await + } SubCommand::ChangeUserTierByAddress(x) => { let db_conn = get_db(cli_config.db_url, 1, 1).await?; From 942865b6ac10fafa79b83badebe79986ce61dedd Mon Sep 17 00:00:00 2001 From: yenicelik Date: Wed, 18 Jan 2023 14:00:30 +0100 Subject: [PATCH 06/47] also removing login from cache. should add tests for all these cases --- .../bin/web3_proxy_cli/change_user_admin_status.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs b/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs index 3058cb7e..1be5ea86 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs @@ -1,12 +1,13 @@ use anyhow::Context; use argh::FromArgs; -use entities::{admin, user}; +use entities::{admin, login, user}; use ethers::types::Address; use log::{debug, info}; use migration::sea_orm::{ self, ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, ModelTrait, IntoActiveModel, QueryFilter, }; +use web3_proxy::frontend::errors::FrontendErrorResponse; /// change a user's admin status. eiter they are an admin, or they aren't #[derive(FromArgs, PartialEq, Eq, Debug)] @@ -59,6 +60,14 @@ impl ChangeUserAdminStatusSubCommand { _ => {} } + // Remove any user logins from the database (incl. bearer tokens) + let delete_result = login::Entity::delete_many() + .filter(login::Column::UserId.eq(user.id)) + .exec(db_conn) + .await; + + debug!("cleared modified logins: {:?}", delete_result); + Ok(()) } } From 31788bb228d7071d1d5b2fd179ee030d83791ba9 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Mon, 30 Jan 2023 17:45:24 +0100 Subject: [PATCH 07/47] imitating user. login flow still needs to check for read-only at every checkpoint. also referral logic can be removed from the login_post function --- entities/src/login.rs | 1 + entities/src/pending_login.rs | 1 + migration/src/lib.rs | 4 + .../m20230130_124740_read_only_login_logic.rs | 43 ++ ...65144_prepare_admin_imitation_pre_login.rs | 58 +++ scripts/apply-migrations.sh | 3 + web3_proxy/src/admin_queries.rs | 5 + web3_proxy/src/frontend/admin.rs | 443 +++++++++++++++++- web3_proxy/src/frontend/authorization.rs | 2 +- web3_proxy/src/frontend/mod.rs | 9 +- web3_proxy/src/frontend/users.rs | 21 +- web3_proxy/src/lib.rs | 21 + 12 files changed, 582 insertions(+), 29 deletions(-) create mode 100644 migration/src/m20230130_124740_read_only_login_logic.rs create mode 100644 migration/src/m20230130_165144_prepare_admin_imitation_pre_login.rs create mode 100644 scripts/apply-migrations.sh diff --git a/entities/src/login.rs b/entities/src/login.rs index 1be72895..f4af45a3 100644 --- a/entities/src/login.rs +++ b/entities/src/login.rs @@ -14,6 +14,7 @@ pub struct Model { pub bearer_token: Uuid, pub user_id: u64, pub expires_at: DateTimeUtc, + pub read_only: bool, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/entities/src/pending_login.rs b/entities/src/pending_login.rs index 2ec44223..196b851c 100644 --- a/entities/src/pending_login.rs +++ b/entities/src/pending_login.rs @@ -15,6 +15,7 @@ pub struct Model { #[sea_orm(column_type = "Text")] pub message: String, pub expires_at: DateTimeUtc, + pub imitating_user: Option, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] diff --git a/migration/src/lib.rs b/migration/src/lib.rs index 7c156bf4..7c0cbe0f 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -14,6 +14,8 @@ mod m20221211_124002_request_method_privacy; mod m20221213_134158_move_login_into_database; mod m20230117_191358_admin_table; mod m20230119_204135_better_free_tier; +mod m20230130_124740_read_only_login_logic; +mod m20230130_165144_prepare_admin_imitation_pre_login; pub struct Migrator; @@ -35,6 +37,8 @@ impl MigratorTrait for Migrator { Box::new(m20221213_134158_move_login_into_database::Migration), Box::new(m20230117_191358_admin_table::Migration), Box::new(m20230119_204135_better_free_tier::Migration), + Box::new(m20230130_124740_read_only_login_logic::Migration), + Box::new(m20230130_165144_prepare_admin_imitation_pre_login::Migration), ] } } diff --git a/migration/src/m20230130_124740_read_only_login_logic.rs b/migration/src/m20230130_124740_read_only_login_logic.rs new file mode 100644 index 00000000..2e47dec4 --- /dev/null +++ b/migration/src/m20230130_124740_read_only_login_logic.rs @@ -0,0 +1,43 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Replace the sample below with your own migration scripts + manager + .alter_table( + Table::alter() + .table(Alias::new("login")) + .add_column( + ColumnDef::new(Login::ReadOnly) + .boolean() + .not_null() + ).to_owned() + ).await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Replace the sample below with your own migration scripts + // Drop the column from the table ... + manager + .alter_table( + Table::alter() + .table(Alias::new("login")) + .drop_column(Alias::new("read_only")) + .to_owned() + ).await + } +} + +/// Learn more at https://docs.rs/sea-query#iden +#[derive(Iden)] +enum Login { + Table, + Id, + BearerToken, + ReadOnly, + UserId, +} diff --git a/migration/src/m20230130_165144_prepare_admin_imitation_pre_login.rs b/migration/src/m20230130_165144_prepare_admin_imitation_pre_login.rs new file mode 100644 index 00000000..64a2a068 --- /dev/null +++ b/migration/src/m20230130_165144_prepare_admin_imitation_pre_login.rs @@ -0,0 +1,58 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Replace the sample below with your own migration scripts + manager + .alter_table( + Table::alter() + .table(Alias::new("pending_login")) + .add_column( + ColumnDef::new(PendingLogin::ImitatingUser) + .big_unsigned() + ) + .add_foreign_key(&TableForeignKey::new() + .name("fk-pending_login-imitating_user") + .from_tbl(PendingLogin::Table) + .to_tbl(User::Table) + .from_col(PendingLogin::ImitatingUser) + .to_col(User::Id) + ) + .to_owned() + ).await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Replace the sample below with your own migration scripts + manager + .alter_table( + Table::alter() + .table(Alias::new("pending_login")) + .drop_foreign_key(Alias::new("fk-pending_login-imitating_user")) + .drop_column(Alias::new("imitating_user")) + .to_owned() + ).await + } +} + +/// Learn more at https://docs.rs/sea-query#iden +#[derive(Iden)] +enum PendingLogin { + Table, + Id, + Nonce, + Message, + ExpiresAt, + ImitatingUser, +} + +/// Learn more at https://docs.rs/sea-query#iden +#[derive(Iden)] +enum User { + Table, + Id +} diff --git a/scripts/apply-migrations.sh b/scripts/apply-migrations.sh new file mode 100644 index 00000000..179ea9a5 --- /dev/null +++ b/scripts/apply-migrations.sh @@ -0,0 +1,3 @@ +sea-orm-cli migrate up + +# sea-orm-cli generate entity -t \ No newline at end of file diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index 32e0ab84..58bd32e1 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -16,6 +16,11 @@ use migration::sea_orm::{self, ActiveModelTrait, ColumnTrait, EntityTrait, IntoA use log::info; use redis_rate_limiter::redis::AsyncCommands; +// TODO: Add some logic to check if the operating user is an admin +// If he is, return true +// If he is not, return false +// This function is used to give permission to certain users + pub async fn query_admin_modify_usertier<'a>( app: &'a Web3ProxyApp, diff --git a/web3_proxy/src/frontend/admin.rs b/web3_proxy/src/frontend/admin.rs index 8f6273e3..e97f58b6 100644 --- a/web3_proxy/src/frontend/admin.rs +++ b/web3_proxy/src/frontend/admin.rs @@ -21,7 +21,7 @@ use axum_client_ip::ClientIp; use axum_macros::debug_handler; use chrono::{TimeZone, Utc}; use entities::sea_orm_active_enums::{LogLevel, Role}; -use entities::{login, pending_login, revert_log, rpc_key, secondary_user, user, user_tier}; +use entities::{admin, login, pending_login, revert_log, rpc_key, secondary_user, user, user_tier}; use ethers::{prelude::Address, types::Bytes}; use hashbrown::HashMap; use http::{HeaderValue, StatusCode}; @@ -43,16 +43,12 @@ use time::{Duration, OffsetDateTime}; use ulid::Ulid; use crate::admin_queries::query_admin_modify_usertier; use crate::frontend::errors::FrontendErrorResponse; +use crate::{PostLogin, PostLoginQuery}; -/// `GET /admin/modify_role` -- Use a bearer token to get the user's key stats such as bandwidth used and methods requested. +/// `GET /admin/modify_role` -- As an admin, modify a user's user-tier /// -/// If no bearer is provided, detailed stats for all users will be shown. -/// View a single user with `?user_id=$x`. -/// View a single chain with `?chain_id=$x`. -/// -/// Set `$x` to zero to see all. -/// -/// TODO: this will change as we add better support for secondary users. +/// - user_address that is to be modified +/// - user_role_tier that is supposed to be adapted #[debug_handler] pub async fn admin_change_user_roles( Extension(app): Extension>, @@ -63,3 +59,432 @@ pub async fn admin_change_user_roles( Ok(response) } + +/// `GET /admin/login/:user_address` -- Being an admin, login as a user in read-only mode +/// +/// - user_address that is to be logged in by +/// We assume that the admin has already logged in, and has a bearer token ... +#[debug_handler] +pub async fn admin_login_get( + Extension(app): Extension>, + ClientIp(ip): ClientIp, + Path(mut params): Path>, +) -> FrontendResult { + // First check if the login is authorized + login_is_authorized(&app, ip).await?; + + // create a message and save it in redis + // TODO: how many seconds? get from config? + + // Same parameters as when someone logs in as a user + let expire_seconds: usize = 20 * 60; + let nonce = Ulid::new(); + let issued_at = OffsetDateTime::now_utc(); + let expiration_time = issued_at.add(Duration::new(expire_seconds as i64, 0)); + + // The admin user is the one that basically logs in, on behalf of the user + // This will generate a login id for the admin, which we will be caching ... + // I suppose with this, the admin can be logged in to one session at a time + // let (caller, _semaphore) = app.bearer_is_authorized(bearer_token).await?; + + // Finally, check if the user is an admin. If he is, return "true" as the third triplet. + // TODO: consider wrapping the output in a struct, instead of a triplet + // TODO: Could try to merge this into the above query ... + // This query will fail if it's not the admin... + + // get the admin field ... + let admin_address: Address = params + .get("admin_address") + .ok_or_else(|| + FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to find admin_address key in request".to_string(), + None, + ) + )? + .parse::
() + .map_err(|err| { + FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to parse user_address as an Address".to_string(), + Some(err.into()) + ) + })?; + + + // Fetch the user_address parameter from the login string ... (as who we want to be logging in ...) + let user_address: Vec = params + .get("user_address") + .ok_or_else(|| + FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to find user_address key in request".to_string(), + None, + ) + )? + .parse::
() + .map_err(|err| { + FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Unable to parse user_address as an Address".to_string(), + Some(err.into()), + ) + })? + .to_fixed_bytes().into(); + + // We want to login to llamanodes.com + let login_domain = app + .config + .login_domain + .clone() + .unwrap_or_else(|| "llamanodes.com".to_string()); + + // Also there must basically be a token, that says that one admin logins _as a user_. + // I'm not yet fully sure how to handle with that logic specifically ... + // TODO: get most of these from the app config + // TODO: Let's check again who the message needs to be signed by; + // if the message does not have to be signed by the user, include the user ... + let message = Message { + // TODO: don't unwrap + // TODO: accept a login_domain from the request? + domain: login_domain.parse().unwrap(), + // In the case of the admin, the admin needs to sign the message, so we include this logic ... + address: admin_address.to_fixed_bytes(),// user_address.to_fixed_bytes(), + // TODO: config for statement + statement: Some("🦙🦙🦙🦙🦙".to_string()), + // TODO: don't unwrap + uri: format!("https://{}/", login_domain).parse().unwrap(), + version: siwe::Version::V1, + chain_id: 1, + expiration_time: Some(expiration_time.into()), + issued_at: issued_at.into(), + nonce: nonce.to_string(), + not_before: None, + request_id: None, + resources: vec![], + }; + + let db_conn = app.db_conn().context("login requires a database")?; + let db_replica = app.db_replica().context("login requires a replica database")?; + + // Get the user that we want to imitate from the read-only database (their id ...) + // TODO: Only get the id, not the whole user object ... + let user = user::Entity::find() + .filter(user::Column::Address.eq(user_address)) + .one(db_replica.conn()) + .await? + .context("fetching admin from db by user_id")?; + + + // Can there be two login-sessions at the same time? + // I supposed if the user logs in, the admin would be logged out and vice versa + + // massage types to fit in the database. sea-orm does not make this very elegant + let uuid = Uuid::from_u128(nonce.into()); + // we add 1 to expire_seconds just to be sure the database has the key for the full expiration_time + let expires_at = Utc + .timestamp_opt(expiration_time.unix_timestamp() + 1, 0) + .unwrap(); + + // we do not store a maximum number of attempted logins. anyone can request so we don't want to allow DOS attacks + // add a row to the database for this user + let user_pending_login = pending_login::ActiveModel { + id: sea_orm::NotSet, + nonce: sea_orm::Set(uuid), + message: sea_orm::Set(message.to_string()), + expires_at: sea_orm::Set(expires_at), + imitating_user: sea_orm::Set(Some(user.id)) + }; + + user_pending_login + .save(&db_conn) + .await + .context("saving user's pending_login")?; + + // there are multiple ways to sign messages and not all wallets support them + // TODO: default message eip from config? + let message_eip = params + .remove("message_eip") + .unwrap_or_else(|| "eip4361".to_string()); + + let message: String = match message_eip.as_str() { + "eip191_bytes" => Bytes::from(message.eip191_bytes().unwrap()).to_string(), + "eip191_hash" => Bytes::from(&message.eip191_hash().unwrap()).to_string(), + "eip4361" => message.to_string(), + _ => { + // TODO: custom error that is handled a 401 + return Err(anyhow::anyhow!("invalid message eip given").into()); + } + }; + + Ok(message.into_response()) +} + +/// `POST /admin/login` - Register or login by posting a signed "siwe" message +/// It is recommended to save the returned bearer token in a cookie. +/// The bearer token can be used to authenticate other requests, such as getting user user's tats or modifying the user's profile +#[debug_handler] +pub async fn admin_login_post( + Extension(app): Extension>, + ClientIp(ip): ClientIp, + Query(query): Query, + Json(payload): Json, +) -> FrontendResult { + login_is_authorized(&app, ip).await?; + + // Check for the signed bytes .. + // TODO: this seems too verbose. how can we simply convert a String into a [u8; 65] + let their_sig_bytes = Bytes::from_str(&payload.sig).context("parsing sig")?; + if their_sig_bytes.len() != 65 { + return Err(anyhow::anyhow!("checking signature length").into()); + } + let mut their_sig: [u8; 65] = [0; 65]; + for x in 0..65 { + their_sig[x] = their_sig_bytes[x] + } + + // we can't trust that they didn't tamper with the message in some way. like some clients return it hex encoded + // TODO: checking 0x seems fragile, but I think it will be fine. siwe message text shouldn't ever start with 0x + let their_msg: Message = if payload.msg.starts_with("0x") { + let their_msg_bytes = Bytes::from_str(&payload.msg).context("parsing payload message")?; + + // TODO: lossy or no? + String::from_utf8_lossy(their_msg_bytes.as_ref()) + .parse::() + .context("parsing hex string message")? + } else { + payload + .msg + .parse::() + .context("parsing string message")? + }; + + // the only part of the message we will trust is their nonce + // TODO: this is fragile. have a helper function/struct for redis keys + let login_nonce = UserBearerToken::from_str(&their_msg.nonce)?; + + // fetch the message we gave them from our database + let db_replica = app.db_replica().context("Getting database connection")?; + + // massage type for the db + let login_nonce_uuid: Uuid = login_nonce.clone().into(); + + // TODO: Here we will need to re-find the parameter where the admin wants to log-in as the user ... + let user_pending_login = pending_login::Entity::find() + .filter(pending_login::Column::Nonce.eq(login_nonce_uuid)) + .one(db_replica.conn()) + .await + .context("database error while finding pending_login")? + .context("login nonce not found")?; + + let our_msg: siwe::Message = user_pending_login + .message + .parse() + .context("parsing siwe message")?; + + // default options are fine. the message includes timestamp and domain and nonce + let verify_config = VerificationOpts::default(); + + if let Err(err_1) = our_msg + .verify(&their_sig, &verify_config) + .await + .context("verifying signature against our local message") + { + // verification method 1 failed. try eip191 + if let Err(err_191) = our_msg + .verify_eip191(&their_sig) + .context("verifying eip191 signature against our local message") + { + let db_conn = app + .db_conn() + .context("deleting expired pending logins requires a db")?; + + // delete ALL expired rows. + let now = Utc::now(); + let delete_result = pending_login::Entity::delete_many() + .filter(pending_login::Column::ExpiresAt.lte(now)) + .exec(&db_conn) + .await?; + + // TODO: emit a stat? if this is high something weird might be happening + debug!("cleared expired pending_logins: {:?}", delete_result); + + return Err(anyhow::anyhow!( + "both the primary and eip191 verification failed: {:#?}; {:#?}", + err_1, + err_191 + ) + .into()); + } + } + + // TODO: Maybe add a context? + let imitating_user = user_pending_login.imitating_user.address?; + + // TODO: limit columns or load whole user? + // TODO: Right now this loads the whole admin. I assume we might want to load the user though (?) figure this out as we go along... + let admin = user::Entity::find() + .filter(user::Column::Address.eq(our_msg.address.as_ref())) + .one(db_replica.conn()) + .await + .unwrap(); + + let u = user::Entity::find() + .filter(user::Column::Address.eq(imitating_user_address.as_ref())) + .one(db_replica.conn()) + .await + .unwrap(); + + let db_conn = app.db_conn().context("login requires a db")?; + + let (u, uks, status_code) = match u { + None => { + // user does not exist yet + + // check the invite code + // TODO: more advanced invite codes that set different request/minute and concurrency limits + if let Some(invite_code) = &app.config.invite_code { + if query.invite_code.as_ref() != Some(invite_code) { + return Err(anyhow::anyhow!("checking invite_code").into()); + } + } + + let txn = db_conn.begin().await?; + + // the only thing we need from them is an address + // everything else is optional + // TODO: different invite codes should allow different levels + // TODO: maybe decrement a count on the invite code? + let u = user::ActiveModel { + address: sea_orm::Set(our_msg.address.into()), + ..Default::default() + }; + + let u = u.insert(&txn).await?; + + // create the user's first api key + let rpc_secret_key = RpcSecretKey::new(); + + let uk = rpc_key::ActiveModel { + user_id: sea_orm::Set(u.id), + secret_key: sea_orm::Set(rpc_secret_key.into()), + description: sea_orm::Set(None), + ..Default::default() + }; + + let uk = uk + .insert(&txn) + .await + .context("Failed saving new user key")?; + + let uks = vec![uk]; + + // save the user and key to the database + txn.commit().await?; + + (u, uks, StatusCode::CREATED) + } + Some(u) => { + // the user is already registered + let uks = rpc_key::Entity::find() + .filter(rpc_key::Column::UserId.eq(u.id)) + .all(db_replica.conn()) + .await + .context("failed loading user's key")?; + + (u, uks, StatusCode::OK) + } + }; + + // create a bearer token for the user. + let user_bearer_token = UserBearerToken::default(); + + // json response with everything in it + // we could return just the bearer token, but I think they will always request api keys and the user profile + let response_json = json!({ + "rpc_keys": uks + .into_iter() + .map(|uk| (uk.id, uk)) + .collect::>(), + "bearer_token": user_bearer_token, + "user": u, + }); + + let response = (status_code, Json(response_json)).into_response(); + + // add bearer to the database + + // expire in 2 days, because this is more critical (and shouldn't need to be done so long!) + let expires_at = Utc::now() + .checked_add_signed(chrono::Duration::days(2)) + .unwrap(); + + // TODO: Here, the bearer token should include a message + // TODO: Above, make sure that the calling address is an admin! + // TODO: Above, make sure that the signed is the admin (address field), + // but then in this request, the admin can pick which user to sign up as + let user_login = login::ActiveModel { + id: sea_orm::NotSet, + bearer_token: sea_orm::Set(user_bearer_token.uuid()), + user_id: sea_orm::Set(u.id), // Yes, this should be the user ... because the rest of the applications takes this item, from the initial user + expires_at: sea_orm::Set(expires_at), + read_only: sea_orm::Set(true) + }; + + user_login + .save(&db_conn) + .await + .context("saving user login")?; + + if let Err(err) = user_pending_login + .into_active_model() + .delete(&db_conn) + .await + { + warn!("Failed to delete nonce:{}: {}", login_nonce.0, err); + } + + Ok(response) + +} + +// TODO: This is basically an exact copy of the user endpoint, I should probabl refactor this code ... +/// `POST /admin/imitate-logout` - Forget the bearer token in the `Authentication` header. +#[debug_handler] +pub async fn admin_logout_post( + Extension(app): Extension>, + TypedHeader(Authorization(bearer)): TypedHeader>, +) -> FrontendResult { + let user_bearer = UserBearerToken::try_from(bearer)?; + + let db_conn = app.db_conn().context("database needed for user logout")?; + + if let Err(err) = login::Entity::delete_many() + .filter(login::Column::BearerToken.eq(user_bearer.uuid())) + .exec(&db_conn) + .await + { + debug!("Failed to delete {}: {}", user_bearer.redis_key(), err); + } + + let now = Utc::now(); + + // also delete any expired logins + let delete_result = login::Entity::delete_many() + .filter(login::Column::ExpiresAt.lte(now)) + .exec(&db_conn) + .await; + + debug!("Deleted expired logins: {:?}", delete_result); + + // also delete any expired pending logins + let delete_result = login::Entity::delete_many() + .filter(login::Column::ExpiresAt.lte(now)) + .exec(&db_conn) + .await; + + debug!("Deleted expired pending logins: {:?}", delete_result); + + // TODO: what should the response be? probably json something + Ok("goodbye".into_response()) +} diff --git a/web3_proxy/src/frontend/authorization.rs b/web3_proxy/src/frontend/authorization.rs index 8c9380da..fc9308ef 100644 --- a/web3_proxy/src/frontend/authorization.rs +++ b/web3_proxy/src/frontend/authorization.rs @@ -9,7 +9,7 @@ use axum::headers::authorization::Bearer; use axum::headers::{Header, Origin, Referer, UserAgent}; use chrono::Utc; use deferred_rate_limiter::DeferredRateLimitResult; -use entities::{login, rpc_key, user, user_tier}; +use entities::{admin, login, rpc_key, user, user_tier}; use ethers::types::Bytes; use ethers::utils::keccak256; use futures::TryFutureExt; diff --git a/web3_proxy/src/frontend/mod.rs b/web3_proxy/src/frontend/mod.rs index fad68a8f..dae34033 100644 --- a/web3_proxy/src/frontend/mod.rs +++ b/web3_proxy/src/frontend/mod.rs @@ -160,8 +160,15 @@ pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<() get(users::user_stats_aggregated_get), ) .route("/user/stats/detailed", get(users::user_stats_detailed_get)) - .route("/admin/modify_role", get(admin::admin_change_user_roles)) .route("/user/logout", post(users::user_logout_post)) + .route("/admin/modify_role", get(admin::admin_change_user_roles)) + .route("/admin/imitate-login/:user_address", get(admin::admin_login_get)) + .route( + "/user/imitate-login/:user_address/:message_eip", + get(admin::admin_login_get), + ) + .route("/admin/imitate-login", post(admin::admin_login_post)) + .route("/admin/imitate-logout", post(admin::admin_login_post)) // // Axum layers // layers are ordered bottom up diff --git a/web3_proxy/src/frontend/users.rs b/web3_proxy/src/frontend/users.rs index 2346be00..9bed230a 100644 --- a/web3_proxy/src/frontend/users.rs +++ b/web3_proxy/src/frontend/users.rs @@ -43,6 +43,7 @@ use time::{Duration, OffsetDateTime}; use ulid::Ulid; use crate::admin_queries::query_admin_modify_usertier; use crate::frontend::errors::FrontendErrorResponse; +use crate::{PostLogin, PostLoginQuery}; /// `GET /user/login/:user_address` or `GET /user/login/:user_address/:message_eip` -- Start the "Sign In with Ethereum" (siwe) login flow. /// @@ -131,6 +132,7 @@ pub async fn user_login_get( nonce: sea_orm::Set(uuid), message: sea_orm::Set(message.to_string()), expires_at: sea_orm::Set(expires_at), + imitating_user: sea_orm::Set(None) }; user_pending_login @@ -157,24 +159,6 @@ pub async fn user_login_get( Ok(message.into_response()) } -/// Query params for our `post_login` handler. -#[derive(Debug, Deserialize)] -pub struct PostLoginQuery { - /// While we are in alpha/beta, we require users to supply an invite code. - /// The invite code (if any) is set in the application's config. - /// This may eventually provide some sort of referral bonus. - pub invite_code: Option, -} - -/// JSON body to our `post_login` handler. -/// Currently only siwe logins that send an address, msg, and sig are allowed. -/// Email/password and other login methods are planned. -#[derive(Debug, Deserialize)] -pub struct PostLogin { - sig: String, - msg: String, -} - /// `POST /user/login` - Register or login by posting a signed "siwe" message. /// It is recommended to save the returned bearer token in a cookie. /// The bearer token can be used to authenticate other requests, such as getting the user's stats or modifying the user's profile. @@ -368,6 +352,7 @@ pub async fn user_login_post( bearer_token: sea_orm::Set(user_bearer_token.uuid()), user_id: sea_orm::Set(u.id), expires_at: sea_orm::Set(expires_at), + read_only: sea_orm::Set(false) }; user_login diff --git a/web3_proxy/src/lib.rs b/web3_proxy/src/lib.rs index c9770319..5c8e74ea 100644 --- a/web3_proxy/src/lib.rs +++ b/web3_proxy/src/lib.rs @@ -11,3 +11,24 @@ pub mod pagerduty; pub mod rpcs; pub mod user_queries; pub mod user_token; + +use serde::Deserialize; + +// Push some commonly used types here. Can establish a folder later on +/// Query params for our `post_login` handler. +#[derive(Debug, Deserialize)] +pub struct PostLoginQuery { + /// While we are in alpha/beta, we require users to supply an invite code. + /// The invite code (if any) is set in the application's config. + /// This may eventually provide some sort of referral bonus. + invite_code: Option, +} + +/// JSON body to our `post_login` handler. +/// Currently only siwe logins that send an address, msg, and sig are allowed. +/// Email/password and other login methods are planned. +#[derive(Debug, Deserialize)] +pub struct PostLogin { + sig: String, + msg: String, +} From fadb61114a87cd52006e4f4b6829a34fea613cf3 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Mon, 30 Jan 2023 20:02:28 +0100 Subject: [PATCH 08/47] should write some tests next --- web3_proxy/src/frontend/admin.rs | 95 ++++++++------------------------ 1 file changed, 24 insertions(+), 71 deletions(-) diff --git a/web3_proxy/src/frontend/admin.rs b/web3_proxy/src/frontend/admin.rs index e97f58b6..c15a103f 100644 --- a/web3_proxy/src/frontend/admin.rs +++ b/web3_proxy/src/frontend/admin.rs @@ -319,82 +319,32 @@ pub async fn admin_login_post( } // TODO: Maybe add a context? - let imitating_user = user_pending_login.imitating_user.address?; + let imitating_user_id = user_pending_login.imitating_user + .context("getting address of the imitating user")?; // TODO: limit columns or load whole user? // TODO: Right now this loads the whole admin. I assume we might want to load the user though (?) figure this out as we go along... let admin = user::Entity::find() .filter(user::Column::Address.eq(our_msg.address.as_ref())) .one(db_replica.conn()) - .await - .unwrap(); + .await? + .context("getting admin address")?; - let u = user::Entity::find() - .filter(user::Column::Address.eq(imitating_user_address.as_ref())) + let imitating_user = user::Entity::find() + .filter(user::Column::Id.eq(imitating_user_id)) .one(db_replica.conn()) + .await? + .context("admin address was not found!")?; + + // I supposed we also get the rpc_key, whatever this is used for (?). + // I think the RPC key should still belong to the admin though in this case ... + + // the user is already registered + let admin_rpc_key = rpc_key::Entity::find() + .filter(rpc_key::Column::UserId.eq(admin.id)) + .all(db_replica.conn()) .await - .unwrap(); - - let db_conn = app.db_conn().context("login requires a db")?; - - let (u, uks, status_code) = match u { - None => { - // user does not exist yet - - // check the invite code - // TODO: more advanced invite codes that set different request/minute and concurrency limits - if let Some(invite_code) = &app.config.invite_code { - if query.invite_code.as_ref() != Some(invite_code) { - return Err(anyhow::anyhow!("checking invite_code").into()); - } - } - - let txn = db_conn.begin().await?; - - // the only thing we need from them is an address - // everything else is optional - // TODO: different invite codes should allow different levels - // TODO: maybe decrement a count on the invite code? - let u = user::ActiveModel { - address: sea_orm::Set(our_msg.address.into()), - ..Default::default() - }; - - let u = u.insert(&txn).await?; - - // create the user's first api key - let rpc_secret_key = RpcSecretKey::new(); - - let uk = rpc_key::ActiveModel { - user_id: sea_orm::Set(u.id), - secret_key: sea_orm::Set(rpc_secret_key.into()), - description: sea_orm::Set(None), - ..Default::default() - }; - - let uk = uk - .insert(&txn) - .await - .context("Failed saving new user key")?; - - let uks = vec![uk]; - - // save the user and key to the database - txn.commit().await?; - - (u, uks, StatusCode::CREATED) - } - Some(u) => { - // the user is already registered - let uks = rpc_key::Entity::find() - .filter(rpc_key::Column::UserId.eq(u.id)) - .all(db_replica.conn()) - .await - .context("failed loading user's key")?; - - (u, uks, StatusCode::OK) - } - }; + .context("failed loading user's key")?; // create a bearer token for the user. let user_bearer_token = UserBearerToken::default(); @@ -402,15 +352,16 @@ pub async fn admin_login_post( // json response with everything in it // we could return just the bearer token, but I think they will always request api keys and the user profile let response_json = json!({ - "rpc_keys": uks + "rpc_keys": admin_rpc_key .into_iter() .map(|uk| (uk.id, uk)) .collect::>(), "bearer_token": user_bearer_token, - "user": u, + "imitating_user": imitating_user, + "admin_user": admin, }); - let response = (status_code, Json(response_json)).into_response(); + let response = (StatusCode::OK, Json(response_json)).into_response(); // add bearer to the database @@ -426,11 +377,13 @@ pub async fn admin_login_post( let user_login = login::ActiveModel { id: sea_orm::NotSet, bearer_token: sea_orm::Set(user_bearer_token.uuid()), - user_id: sea_orm::Set(u.id), // Yes, this should be the user ... because the rest of the applications takes this item, from the initial user + user_id: sea_orm::Set(imitating_user.id), // Yes, this should be the user ... because the rest of the applications takes this item, from the initial user expires_at: sea_orm::Set(expires_at), read_only: sea_orm::Set(true) }; + let db_conn = app.db_conn().context("Getting database connection")?; + user_login .save(&db_conn) .await From d055febc2ed5084ee8de5522ac7240a8969151cf Mon Sep 17 00:00:00 2001 From: yenicelik Date: Tue, 31 Jan 2023 14:55:39 +0100 Subject: [PATCH 09/47] short script to install nextest --- scripts/install-test-suite.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 scripts/install-test-suite.sh diff --git a/scripts/install-test-suite.sh b/scripts/install-test-suite.sh new file mode 100644 index 00000000..4ff5e09d --- /dev/null +++ b/scripts/install-test-suite.sh @@ -0,0 +1,3 @@ +cargo install cargo-binstall +cargo install cargo-binstall +# cargo nextest run \ No newline at end of file From 4582cf3e37af5859fc66b80ab80c48e12acadbb9 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Tue, 31 Jan 2023 15:08:46 +0100 Subject: [PATCH 10/47] should next write some simple end2end test --- .../m20230130_124740_read_only_login_logic.rs | 9 ++++----- ..._165144_prepare_admin_imitation_pre_login.rs | 8 +++----- web3_proxy/src/admin_queries.rs | 17 +++++++++++++---- web3_proxy/src/bin/web3_proxy_cli/main.rs | 2 +- web3_proxy/src/frontend/admin.rs | 7 +++++-- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/migration/src/m20230130_124740_read_only_login_logic.rs b/migration/src/m20230130_124740_read_only_login_logic.rs index 2e47dec4..064dc683 100644 --- a/migration/src/m20230130_124740_read_only_login_logic.rs +++ b/migration/src/m20230130_124740_read_only_login_logic.rs @@ -6,11 +6,11 @@ pub struct Migration; #[async_trait::async_trait] impl MigrationTrait for Migration { async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts + // Add a read-only column to the table manager .alter_table( Table::alter() - .table(Alias::new("login")) + .table(Login::Table) .add_column( ColumnDef::new(Login::ReadOnly) .boolean() @@ -20,13 +20,12 @@ impl MigrationTrait for Migration { } async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts // Drop the column from the table ... manager .alter_table( Table::alter() - .table(Alias::new("login")) - .drop_column(Alias::new("read_only")) + .table(Login::Table) + .drop_column(Login::ReadOnly) .to_owned() ).await } diff --git a/migration/src/m20230130_165144_prepare_admin_imitation_pre_login.rs b/migration/src/m20230130_165144_prepare_admin_imitation_pre_login.rs index 64a2a068..ff6ec868 100644 --- a/migration/src/m20230130_165144_prepare_admin_imitation_pre_login.rs +++ b/migration/src/m20230130_165144_prepare_admin_imitation_pre_login.rs @@ -6,11 +6,10 @@ pub struct Migration; #[async_trait::async_trait] impl MigrationTrait for Migration { async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .alter_table( Table::alter() - .table(Alias::new("pending_login")) + .table(PendingLogin::Table) .add_column( ColumnDef::new(PendingLogin::ImitatingUser) .big_unsigned() @@ -27,13 +26,12 @@ impl MigrationTrait for Migration { } async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .alter_table( Table::alter() - .table(Alias::new("pending_login")) + .table(PendingLogin::Table) .drop_foreign_key(Alias::new("fk-pending_login-imitating_user")) - .drop_column(Alias::new("imitating_user")) + .drop_column(PendingLogin::ImitatingUser) .to_owned() ).await } diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index 58bd32e1..83305808 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -15,6 +15,7 @@ use http::StatusCode; use migration::sea_orm::{self, ActiveModelTrait, ColumnTrait, EntityTrait, IntoActiveModel, QueryFilter}; use log::info; use redis_rate_limiter::redis::AsyncCommands; +use crate::frontend::errors::FrontendErrorResponse::AccessDenied; // TODO: Add some logic to check if the operating user is an admin // If he is, return true @@ -79,7 +80,7 @@ pub async fn query_admin_modify_usertier<'a>( .filter(admin::Column::UserId.eq(caller_id)) .one(&db_conn) .await? - .context("This user is not registered as an admin")?; + .ok_or(AccessDenied.into())?; // If we are here, that means an admin was found, and we can safely proceed @@ -88,7 +89,11 @@ pub async fn query_admin_modify_usertier<'a>( .filter(user::Column::Address.eq(user_address)) .one(&db_conn) .await? - .context("No user with this id found as the change")?; + .ok_or(FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "No user with this id found".to_string(), + None, + ))?; // Return early if the target user_tier_id is the same as the original user_tier_id response_body.insert( "user_tier_title", @@ -96,11 +101,15 @@ pub async fn query_admin_modify_usertier<'a>( ); // Now we can modify the user's tier - let new_user_tier: user_tier::Model = user_tier::Entity::find() + let new_user_tier: user_tier::Model = !user_tier::Entity::find() .filter(user_tier::Column::Title.eq(user_tier_title.clone())) .one(&db_conn) .await? - .context("No user tier found with that name")?; + .ok_or(|| FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "User Tier name was not found".to_string(), + None, + ))?; if user.user_tier_id == new_user_tier.id { info!("user already has that tier"); diff --git a/web3_proxy/src/bin/web3_proxy_cli/main.rs b/web3_proxy/src/bin/web3_proxy_cli/main.rs index ff7dd61b..73710f7a 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/main.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/main.rs @@ -70,8 +70,8 @@ pub struct Web3ProxyCli { #[argh(subcommand)] enum SubCommand { ChangeUserAddress(change_user_address::ChangeUserAddressSubCommand), - ChangeUserTier(change_user_tier::ChangeUserTierSubCommand), ChangeUserAdminStatus(change_user_admin_status::ChangeUserAdminStatusSubCommand), + ChangeUserTier(change_user_tier::ChangeUserTierSubCommand), ChangeUserTierByAddress(change_user_tier_by_address::ChangeUserTierByAddressSubCommand), ChangeUserTierByKey(change_user_tier_by_key::ChangeUserTierByKeySubCommand), CheckConfig(check_config::CheckConfigSubCommand), diff --git a/web3_proxy/src/frontend/admin.rs b/web3_proxy/src/frontend/admin.rs index c15a103f..6532d312 100644 --- a/web3_proxy/src/frontend/admin.rs +++ b/web3_proxy/src/frontend/admin.rs @@ -173,8 +173,11 @@ pub async fn admin_login_get( .filter(user::Column::Address.eq(user_address)) .one(db_replica.conn()) .await? - .context("fetching admin from db by user_id")?; - + .ok_or(FrontendErrorResponse::StatusCode( + StatusCode::BAD_REQUEST, + "Could not find user in db".to_string(), + None, + ))?; // Can there be two login-sessions at the same time? // I supposed if the user logs in, the admin would be logged out and vice versa From 7b154d9e8cf826441d9d42253944ca4e0dc7399c Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Fri, 10 Feb 2023 01:06:50 -0800 Subject: [PATCH 11/47] check all the variables --- Jenkinsfile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 47cb0c2b..17085022 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,19 +1,20 @@ def buildAndPush() { + // env.ARCH is the system architecture. some apps can be generic (amd64, arm64), + // but apps that compile for specific hardware (like web3-proxy) will need more specific tags (amd64_epyc2, arm64_graviton2, intel_xeon3, etc.) // env.BRANCH_NAME is set to the git branch name by default // env.REGISTRY is the repository url for this pipeline // env.GIT_SHORT is the git short hash of the currently checked out repo // env.LATEST_BRANCH is the branch name that gets tagged latest - // env.ARCH is the system architecture. some apps can be generic (amd64, arm64), - // but apps that compile for specific hardware (like web3-proxy) will need more specific tags (amd64_epyc2, arm64_graviton2, intel_xeon3, etc.) // TODO: check that this system actually matches the given arch sh '''#!/bin/bash set -eux -o pipefail - [ -n "$GIT_SHORT" ] - [ -n "$GIT_SHORT" ] - [ -n "$REGISTRY" ] [ -n "$ARCH" ] + [ -n "$BRANCH_NAME" ] + [ -n "$REGISTRY" ] + [ -n "$GIT_SHORT" ] + [ -n "$LATEST_BRANCH" ] # deterministic mtime on .git keeps Dockerfiles that do 'ADD . .' or similar # without this, the build process always thinks the directory has changes From 77b0c7e3dc4e9642f4698c7adf36495e98f6a0fc Mon Sep 17 00:00:00 2001 From: yenicelik Date: Fri, 10 Feb 2023 17:01:47 +0000 Subject: [PATCH 12/47] merge with old master --- scripts/install-test-suite.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/install-test-suite.sh b/scripts/install-test-suite.sh index 4ff5e09d..75d2304f 100644 --- a/scripts/install-test-suite.sh +++ b/scripts/install-test-suite.sh @@ -1,3 +1,3 @@ cargo install cargo-binstall -cargo install cargo-binstall +cargo binstall cargo-nextest # cargo nextest run \ No newline at end of file From 8562fc3384344f033951c8f96f0e127ed3dc41ec Mon Sep 17 00:00:00 2001 From: yenicelik Date: Fri, 10 Feb 2023 17:48:51 +0000 Subject: [PATCH 13/47] changes from review. will test now --- scripts/apply-migrations.sh | 6 +- scripts/manual-tests/16-change-user-tier.sh | 0 scripts/manual-tests/19-admin-imitate-user.sh | 0 web3_proxy/src/admin_queries.rs | 82 ++++++++++--------- .../change_user_admin_status.rs | 59 +++++++++---- web3_proxy/src/bin/web3_proxy_cli/main.rs | 2 +- web3_proxy/src/frontend/admin.rs | 6 +- web3_proxy/src/frontend/mod.rs | 2 +- 8 files changed, 93 insertions(+), 64 deletions(-) create mode 100644 scripts/manual-tests/16-change-user-tier.sh create mode 100644 scripts/manual-tests/19-admin-imitate-user.sh diff --git a/scripts/apply-migrations.sh b/scripts/apply-migrations.sh index 179ea9a5..3021239b 100644 --- a/scripts/apply-migrations.sh +++ b/scripts/apply-migrations.sh @@ -1,3 +1,3 @@ -sea-orm-cli migrate up - -# sea-orm-cli generate entity -t \ No newline at end of file +# sea-orm-cli migrate up +# sea-orm-cli generate entity -u mysql://root:dev_web3_proxy@127.0.0.1:13306/dev_web3_proxy -o entities/src --with-serde both +# sea-orm-cli generate entity -t \ No newline at end of file diff --git a/scripts/manual-tests/16-change-user-tier.sh b/scripts/manual-tests/16-change-user-tier.sh new file mode 100644 index 00000000..e69de29b diff --git a/scripts/manual-tests/19-admin-imitate-user.sh b/scripts/manual-tests/19-admin-imitate-user.sh new file mode 100644 index 00000000..e69de29b diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index 83305808..cfe7eb34 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -8,8 +8,10 @@ use axum::{ TypedHeader, }; use axum::response::{IntoResponse, Response}; -use entities::{admin, user, user_tier}; +use entities::{admin, login, user, user_tier}; use ethers::prelude::Address; +use ethers::types::Bytes; +use ethers::utils::keccak256; use hashbrown::HashMap; use http::StatusCode; use migration::sea_orm::{self, ActiveModelTrait, ColumnTrait, EntityTrait, IntoActiveModel, QueryFilter}; @@ -32,29 +34,13 @@ pub async fn query_admin_modify_usertier<'a>( // Quickly return if any of the input tokens are bad let user_address: Vec = params .get("user_address") - .ok_or_else(|| - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to find user_address key in request".to_string(), - None, - ) - )? + .ok_or_else(|| FrontendErrorResponse::BadRequest("Unable to find user_address key in request".to_string()))? .parse::
() - .map_err(|err| { - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to parse user_address as an Address".to_string(), - Some(err.into()), - ) - })? + .map_err(|_| FrontendErrorResponse::BadRequest("Unable to parse user_address as an Address".to_string()))? .to_fixed_bytes().into(); let user_tier_title = params .get("user_tier_title") - .ok_or_else(|| FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to get the user_tier_title key from the request".to_string(), - None, - ))?; + .ok_or_else(||FrontendErrorResponse::BadRequest("Unable to get the user_tier_title key from the request".to_string()))?; // Prepare output body let mut response_body = HashMap::new(); @@ -78,22 +64,18 @@ pub async fn query_admin_modify_usertier<'a>( // Check if the caller is an admin (i.e. if he is in an admin table) let admin: admin::Model = admin::Entity::find() .filter(admin::Column::UserId.eq(caller_id)) - .one(&db_conn) + .one(db_replica.conn()) .await? - .ok_or(AccessDenied.into())?; + .ok_or(AccessDenied)?; // If we are here, that means an admin was found, and we can safely proceed // Fetch the admin, and the user let user: user::Model = user::Entity::find() .filter(user::Column::Address.eq(user_address)) - .one(&db_conn) + .one(db_replica.conn()) .await? - .ok_or(FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "No user with this id found".to_string(), - None, - ))?; + .ok_or(FrontendErrorResponse::BadRequest("No user with this id found".to_string()))?; // Return early if the target user_tier_id is the same as the original user_tier_id response_body.insert( "user_tier_title", @@ -101,20 +83,16 @@ pub async fn query_admin_modify_usertier<'a>( ); // Now we can modify the user's tier - let new_user_tier: user_tier::Model = !user_tier::Entity::find() + let new_user_tier: user_tier::Model = user_tier::Entity::find() .filter(user_tier::Column::Title.eq(user_tier_title.clone())) - .one(&db_conn) + .one(db_replica.conn()) .await? - .ok_or(|| FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "User Tier name was not found".to_string(), - None, - ))?; + .ok_or(FrontendErrorResponse::BadRequest("User Tier name was not found".to_string()))?; if user.user_tier_id == new_user_tier.id { info!("user already has that tier"); } else { - let mut user = user.into_active_model(); + let mut user = user.clone().into_active_model(); user.user_tier_id = sea_orm::Set(new_user_tier.id); @@ -123,11 +101,35 @@ pub async fn query_admin_modify_usertier<'a>( info!("user's tier changed"); } - // Finally, remove the user from redis - // TODO: Also remove the user from the redis - // redis_conn.zrem(); - // redis_conn.get::<_, u64>(&user.) // TODO: Where do i find the bearer token ... + // Query the login table, and get all bearer tokens by this user + let bearer_tokens = login::Entity::find() + .filter(login::Column::UserId.eq(user.id)) + .all(db_replica.conn()) + .await?; + // TODO: Remove from Redis + // Remove multiple items simultaneously, but this should be quick let's not prematurely optimize + let recent_user_id_key = format!("recent_users:id:{}", app.config.chain_id); + let salt = app + .config + .public_recent_ips_salt + .as_ref() + .expect("public_recent_ips_salt must exist in here"); + + // TODO: How do I remove the redis items (?) + for bearer_token in bearer_tokens { + let salted_user_id = format!("{}:{}", salt, bearer_token.user_id); + let hashed_user_id = Bytes::from(keccak256(salted_user_id.as_bytes())); + redis_conn + .zrem(&recent_user_id_key, hashed_user_id.to_string()) + .await?; + } + + // Now delete these tokens ... + login::Entity::delete_many() + .filter(login::Column::UserId.eq(user.id)) + .exec(&db_conn) + .await?; Ok(Json(&response_body).into_response()) diff --git a/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs b/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs index 1be5ea86..1ee46659 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs @@ -1,7 +1,9 @@ use anyhow::Context; use argh::FromArgs; use entities::{admin, login, user}; -use ethers::types::Address; +use ethers::types::{Address, Bytes}; +use ethers::utils::keccak256; +use http::StatusCode; use log::{debug, info}; use migration::sea_orm::{ self, ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, ModelTrait, IntoActiveModel, @@ -34,17 +36,19 @@ impl ChangeUserAdminStatusSubCommand { .filter(user::Column::Address.eq(address.clone())) .one(db_conn) .await? - .context("No user found with that address")?; - - // Check if there is a record in the database - let mut admin = admin::Entity::find() - .filter(admin::Column::UserId.eq(address)) - .all(db_conn) - .await?; + .context(format!("No user with this id found {:?}", address))?; debug!("user: {:#?}", user); - match admin.pop() { + // Check if there is a record in the database + match admin::Entity::find() + .filter(admin::Column::UserId.eq(address)) + .one(db_conn) + .await? { + Some(old_admin) if !should_be_admin => { + // User is already an admin, but shouldn't be + old_admin.delete(db_conn).await?; + } None if should_be_admin => { // User is not an admin yet, but should be let new_admin = admin::ActiveModel { @@ -52,19 +56,42 @@ impl ChangeUserAdminStatusSubCommand { ..Default::default() }; new_admin.insert(db_conn).await?; - }, - Some(old_admin) if !should_be_admin => { - // User is already an admin, but shouldn't be - old_admin.delete(db_conn).await?; - }, - _ => {} + } + _ => { + // Do nothing in this case + debug!("no change needed for: {:#?}", user); + // Early return + return Ok(()); + } } + // Get the bearer tokens of this user and delete them ... + let bearer_tokens = login::Entity::find() + .filter(login::Column::UserId.eq(user.id)) + .all(db_conn) + .await?; + + // // TODO: Remove from Redis + // // Remove multiple items simultaneously, but this should be quick let's not prematurely optimize + // let recent_user_id_key = format!("recent_users:id:{}", app.config.chain_id); + // let salt = app + // .config + // .public_recent_ips_salt + // .as_ref() + // .expect("public_recent_ips_salt must exist in here"); + // + // // TODO: Also clear redis ... + // let salted_user_id = format!("{}:{}", salt, bearer_token.user_id); + // let hashed_user_id = Bytes::from(keccak256(salted_user_id.as_bytes())); + // redis_conn + // .zrem(&recent_user_id_key, hashed_user_id.to_string()) + // .await?; + // Remove any user logins from the database (incl. bearer tokens) let delete_result = login::Entity::delete_many() .filter(login::Column::UserId.eq(user.id)) .exec(db_conn) - .await; + .await?; debug!("cleared modified logins: {:?}", delete_result); diff --git a/web3_proxy/src/bin/web3_proxy_cli/main.rs b/web3_proxy/src/bin/web3_proxy_cli/main.rs index 19b608cf..41ce5ad8 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/main.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/main.rs @@ -309,7 +309,7 @@ fn main() -> anyhow::Result<()> { SubCommand::ChangeUserTierByAddress(x) => { let db_url = cli_config .db_url - .expect("'--config' (with a db) or '--db-url' is required to run proxyd"); + .expect("'--config' (with a db) or '--db-url' is required to run change_user_admin_status"); let db_conn = get_db(db_url, 1, 1).await?; diff --git a/web3_proxy/src/frontend/admin.rs b/web3_proxy/src/frontend/admin.rs index 6532d312..8ef18785 100644 --- a/web3_proxy/src/frontend/admin.rs +++ b/web3_proxy/src/frontend/admin.rs @@ -17,7 +17,7 @@ use axum::{ response::IntoResponse, Extension, Json, TypedHeader, }; -use axum_client_ip::ClientIp; +use axum_client_ip::InsecureClientIp; use axum_macros::debug_handler; use chrono::{TimeZone, Utc}; use entities::sea_orm_active_enums::{LogLevel, Role}; @@ -67,7 +67,7 @@ pub async fn admin_change_user_roles( #[debug_handler] pub async fn admin_login_get( Extension(app): Extension>, - ClientIp(ip): ClientIp, + InsecureClientIp(ip): InsecureClientIp, Path(mut params): Path>, ) -> FrontendResult { // First check if the login is authorized @@ -229,7 +229,7 @@ pub async fn admin_login_get( #[debug_handler] pub async fn admin_login_post( Extension(app): Extension>, - ClientIp(ip): ClientIp, + InsecureClientIp(ip): InsecureClientIp, Query(query): Query, Json(payload): Json, ) -> FrontendResult { diff --git a/web3_proxy/src/frontend/mod.rs b/web3_proxy/src/frontend/mod.rs index 3ed6d163..2ca61893 100644 --- a/web3_proxy/src/frontend/mod.rs +++ b/web3_proxy/src/frontend/mod.rs @@ -171,7 +171,7 @@ pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<() .route("/admin/modify_role", get(admin::admin_change_user_roles)) .route("/admin/imitate-login/:user_address", get(admin::admin_login_get)) .route( - "/user/imitate-login/:user_address/:message_eip", + "/admin/imitate-login/:user_address/:message_eip", get(admin::admin_login_get), ) .route("/admin/imitate-login", post(admin::admin_login_post)) From c95911098652b8ddcfae14d71f55db06e054eeea Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Fri, 10 Feb 2023 20:24:20 -0800 Subject: [PATCH 14/47] better sorting of connections --- TODO.md | 5 +++++ web3_proxy/src/app/mod.rs | 4 +++- web3_proxy/src/rpcs/many.rs | 33 +++++++++++++++++---------------- web3_proxy/src/rpcs/one.rs | 8 ++++---- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/TODO.md b/TODO.md index 207567c4..7b46c0e8 100644 --- a/TODO.md +++ b/TODO.md @@ -339,6 +339,10 @@ These are not yet ordered. There might be duplicates. We might not actually need - [-] add configurable size limits to all the Caches - instead of configuring each cache with MB sizes, have one value for total memory footprint and then percentages for each cache - https://github.com/moka-rs/moka/issues/201 +- [ ] have multiple providers on each backend rpc. one websocket for newHeads. and then http providers for handling requests + - erigon only streams the JSON over HTTP. that code isn't enabled for websockets. so this should save memory on the erigon servers + - i think this also means we don't need to worry about changing the id that the user gives us. + - have the healthcheck get the block over http. if it errors, or doesn't match what the websocket says, something is wrong (likely a deadlock in the websocket code) - [ ] have private transactions be enabled by a url setting rather than a setting on the key - [ ] cli for adding rpc keys to an existing user - [ ] rate limiting/throttling on query_user_stats @@ -349,6 +353,7 @@ These are not yet ordered. There might be duplicates. We might not actually need - if total difficulty is not on the block and we aren't on ETH, fetch the full block instead of just the header - if total difficulty is set and non-zero, use it for consensus instead of just the number - [ ] query_user_stats cache hit rate +- [ ] need debounce on reconnect. websockets are closing on us and then we reconnect twice. locks on ProviderState need more thought - [ ] having the whole block in status is very verbose. trim it down - [ ] `cost estimate` script - sum bytes and number of requests. prompt hosting costs. divide diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index 75610ca2..a0805fbe 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -1222,13 +1222,15 @@ impl Web3ProxyApp { (&self.balanced_rpcs, default_num) }; + let head_block_num = self.balanced_rpcs.head_block_num(); + // try_send_all_upstream_servers puts the request id into the response. no need to do that ourselves here. let mut response = private_rpcs .try_send_all_synced_connections( authorization, &request, Some(request_metadata.clone()), - None, + head_block_num.as_ref(), Level::Trace, num, true, diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index a46e66f6..a2b555b5 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -680,12 +680,11 @@ impl Web3Rpcs { .clone(); // synced connections are all on the same block. sort them by tier with higher soft limits first - synced_conns.sort_by_cached_key(|x| (x.tier, u32::MAX - x.soft_limit)); + synced_conns.sort_by_cached_key(sort_rpcs_by_sync_status); // if there aren't enough synced connections, include more connections let mut all_conns: Vec<_> = self.conns.values().cloned().collect(); - - sort_connections_by_sync_status(&mut all_conns); + all_conns.sort_by_cached_key(sort_rpcs_by_sync_status); for connection in itertools::chain(synced_conns, all_conns) { if max_count == 0 { @@ -1153,19 +1152,21 @@ impl Serialize for Web3Rpcs { } /// sort by block number (descending) and tier (ascending) -fn sort_connections_by_sync_status(rpcs: &mut Vec>) { - rpcs.sort_by_cached_key(|x| { - let reversed_head_block = u64::MAX - - x.head_block - .read() - .as_ref() - .map(|x| x.number().as_u64()) - .unwrap_or(0); +/// TODO: should this be moved into a `impl Web3Rpc`? +/// TODO: take AsRef or something like that? We don't need an Arc here +fn sort_rpcs_by_sync_status(x: &Arc) -> (u64, u64, u32) { + let reversed_head_block = u64::MAX + - x.head_block + .read() + .as_ref() + .map(|x| x.number().as_u64()) + .unwrap_or(0); - let tier = x.tier; + let tier = x.tier; - (reversed_head_block, tier) - }); + let request_ewma = x.latency.request_ewma; + + (reversed_head_block, tier, request_ewma) } mod tests { @@ -1208,7 +1209,7 @@ mod tests { .map(|x| SavedBlock::new(Arc::new(x))) .collect(); - let mut rpcs = [ + let mut rpcs: Vec<_> = [ Web3Rpc { name: "a".to_string(), tier: 0, @@ -1250,7 +1251,7 @@ mod tests { .map(Arc::new) .collect(); - sort_connections_by_sync_status(&mut rpcs); + rpcs.sort_by_cached_key(sort_rpcs_by_sync_status); let names_in_sort_order: Vec<_> = rpcs.iter().map(|x| x.name.as_str()).collect(); diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 05bc0e54..9a01cd80 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -67,13 +67,13 @@ impl ProviderState { pub struct Web3RpcLatencies { /// Traack how far behind the fastest node we are - new_head: Histogram, + pub new_head: Histogram, /// exponentially weighted moving average of how far behind the fastest node we are - new_head_ewma: u32, + pub new_head_ewma: u32, /// Track how long an rpc call takes on average - request: Histogram, + pub request: Histogram, /// exponentially weighted moving average of how far behind the fastest node we are - request_ewma: u32, + pub request_ewma: u32, } impl Default for Web3RpcLatencies { From f1bc00082aebc82550c1e5c24aaea5d0dec584b2 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Fri, 10 Feb 2023 20:45:57 -0800 Subject: [PATCH 15/47] include to_block more places --- TODO.md | 4 ++ web3_proxy/src/app/mod.rs | 7 ++- web3_proxy/src/block_number.rs | 4 +- web3_proxy/src/rpcs/blockchain.rs | 10 +++- web3_proxy/src/rpcs/many.rs | 88 +++++++++++++++++++++++-------- 5 files changed, 87 insertions(+), 26 deletions(-) diff --git a/TODO.md b/TODO.md index 7b46c0e8..10974a82 100644 --- a/TODO.md +++ b/TODO.md @@ -330,6 +330,8 @@ These are not yet ordered. There might be duplicates. We might not actually need - [x] block all admin_ rpc commands - [x] remove the "metered" crate now that we save aggregate queries? - [x] add archive depth to app config +- [x] use from_block and to_block so that eth_getLogs is routed correctly +- [x] improve eth_sendRawTransaction server selection - [-] proxy mode for benchmarking all backends - [-] proxy mode for sending to multiple backends - [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly @@ -343,6 +345,8 @@ These are not yet ordered. There might be duplicates. We might not actually need - erigon only streams the JSON over HTTP. that code isn't enabled for websockets. so this should save memory on the erigon servers - i think this also means we don't need to worry about changing the id that the user gives us. - have the healthcheck get the block over http. if it errors, or doesn't match what the websocket says, something is wrong (likely a deadlock in the websocket code) +- [ ] maybe we shouldn't route eth_getLogs to syncing nodes. serving queries slows down sync significantly + - change the send_best function to only include servers that are at least close to fully synced - [ ] have private transactions be enabled by a url setting rather than a setting on the key - [ ] cli for adding rpc keys to an existing user - [ ] rate limiting/throttling on query_user_stats diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index a0805fbe..4a35eb71 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -1158,6 +1158,7 @@ impl Web3ProxyApp { request, Some(&request_metadata), None, + None, ) .await?; @@ -1231,6 +1232,7 @@ impl Web3ProxyApp { &request, Some(request_metadata.clone()), head_block_num.as_ref(), + None, Level::Trace, num, true, @@ -1527,6 +1529,7 @@ impl Web3ProxyApp { if let Some(cache_key) = cache_key { let from_block_num = cache_key.from_block.as_ref().map(|x| x.number()); + let to_block_num = cache_key.to_block.as_ref().map(|x| x.number()); self.response_cache .try_get_with(cache_key, async move { @@ -1539,6 +1542,7 @@ impl Web3ProxyApp { request, Some(&request_metadata), from_block_num.as_ref(), + to_block_num.as_ref(), ) .await?; @@ -1547,7 +1551,7 @@ impl Web3ProxyApp { // TODO: only cache the inner response // TODO: how are we going to stream this? - // TODO: check response size. if its very large, return it in a custom Error type that bypasses caching + // TODO: check response size. if its very large, return it in a custom Error type that bypasses caching? or will moka do that for us? Ok::<_, anyhow::Error>(response) }) .await @@ -1567,6 +1571,7 @@ impl Web3ProxyApp { request, Some(&request_metadata), None, + None, ) .await? } diff --git a/web3_proxy/src/block_number.rs b/web3_proxy/src/block_number.rs index 33ef7f54..ef256b84 100644 --- a/web3_proxy/src/block_number.rs +++ b/web3_proxy/src/block_number.rs @@ -215,8 +215,8 @@ pub async fn block_needed( }; return Ok(BlockNeeded::CacheRange { - from_block_num: from_block_num, - to_block_num: to_block_num, + from_block_num, + to_block_num, cache_errors: true, }); } diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs index ce79d76a..679516a0 100644 --- a/web3_proxy/src/rpcs/blockchain.rs +++ b/web3_proxy/src/rpcs/blockchain.rs @@ -167,7 +167,13 @@ impl Web3Rpcs { // TODO: request_metadata? maybe we should put it in the authorization? // TODO: think more about this wait_for_sync let response = self - .try_send_best_consensus_head_connection(authorization, request, None, None) + .try_send_best_consensus_head_connection( + authorization, + request, + None, + None, + None, + ) .await?; let block = response.result.context("failed fetching block")?; @@ -258,7 +264,7 @@ impl Web3Rpcs { // TODO: request_metadata or authorization? // we don't actually set min_block_needed here because all nodes have all blocks let response = self - .try_send_best_consensus_head_connection(authorization, request, None, None) + .try_send_best_consensus_head_connection(authorization, request, None, None, None) .await?; if let Some(err) = response.error { diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index a2b555b5..32cfc8a0 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -411,7 +411,9 @@ impl Web3Rpcs { authorization: &Arc, request_metadata: Option<&Arc>, skip: &[Arc], + // TODO: if we are checking for the consensus head, i don' think we need min_block_needed/max_block_needed min_block_needed: Option<&U64>, + max_block_needed: Option<&U64>, ) -> anyhow::Result { if let Ok(without_backups) = self ._best_consensus_head_connection( @@ -420,6 +422,7 @@ impl Web3Rpcs { request_metadata, skip, min_block_needed, + max_block_needed, ) .await { @@ -435,6 +438,7 @@ impl Web3Rpcs { request_metadata, skip, min_block_needed, + max_block_needed, ) .await } @@ -447,6 +451,7 @@ impl Web3Rpcs { request_metadata: Option<&Arc>, skip: &[Arc], min_block_needed: Option<&U64>, + max_block_needed: Option<&U64>, ) -> anyhow::Result { let usable_rpcs_by_head_num_and_weight: BTreeMap<(Option, u64), Vec>> = { let synced_connections = self.watch_consensus_connections_sender.borrow().clone(); @@ -471,6 +476,13 @@ impl Web3Rpcs { .filter(|x| if allow_backups { true } else { !x.backup }) .filter(|x| !skip.contains(x)) .filter(|x| x.has_block_data(min_block_needed)) + .filter(|x| { + if let Some(max_block_needed) = max_block_needed { + x.has_block_data(max_block_needed) + } else { + true + } + }) .cloned() { let x_head_block = x.head_block.read().clone(); @@ -637,28 +649,42 @@ impl Web3Rpcs { pub async fn all_connections( &self, authorization: &Arc, - block_needed: Option<&U64>, + min_block_needed: Option<&U64>, + max_block_needed: Option<&U64>, max_count: Option, always_include_backups: bool, ) -> Result, Option> { if !always_include_backups { if let Ok(without_backups) = self - ._all_connections(false, authorization, block_needed, max_count) + ._all_connections( + false, + authorization, + min_block_needed, + max_block_needed, + max_count, + ) .await { return Ok(without_backups); } } - self._all_connections(true, authorization, block_needed, max_count) - .await + self._all_connections( + true, + authorization, + min_block_needed, + max_block_needed, + max_count, + ) + .await } async fn _all_connections( &self, allow_backups: bool, authorization: &Arc, - block_needed: Option<&U64>, + min_block_needed: Option<&U64>, + max_block_needed: Option<&U64>, max_count: Option, ) -> Result, Option> { let mut earliest_retry_at = None; @@ -680,11 +706,12 @@ impl Web3Rpcs { .clone(); // synced connections are all on the same block. sort them by tier with higher soft limits first - synced_conns.sort_by_cached_key(sort_rpcs_by_sync_status); + synced_conns.sort_by_cached_key(rpc_sync_status_sort_key); // if there aren't enough synced connections, include more connections + // TODO: only do this sorting if the synced_conns isn't enough let mut all_conns: Vec<_> = self.conns.values().cloned().collect(); - all_conns.sort_by_cached_key(sort_rpcs_by_sync_status); + all_conns.sort_by_cached_key(rpc_sync_status_sort_key); for connection in itertools::chain(synced_conns, all_conns) { if max_count == 0 { @@ -701,7 +728,13 @@ impl Web3Rpcs { continue; } - if let Some(block_needed) = block_needed { + if let Some(block_needed) = min_block_needed { + if !connection.has_block_data(block_needed) { + continue; + } + } + + if let Some(block_needed) = max_block_needed { if !connection.has_block_data(block_needed) { continue; } @@ -709,7 +742,7 @@ impl Web3Rpcs { // check rate limits and increment our connection counter match connection - .try_request_handle(authorization, block_needed.is_none()) + .try_request_handle(authorization, min_block_needed.is_none()) .await { Ok(OpenRequestResult::RetryAt(retry_at)) => { @@ -748,6 +781,7 @@ impl Web3Rpcs { request: JsonRpcRequest, request_metadata: Option<&Arc>, min_block_needed: Option<&U64>, + max_block_needed: Option<&U64>, ) -> anyhow::Result { let mut skip_rpcs = vec![]; let mut method_not_available_response = None; @@ -768,6 +802,7 @@ impl Web3Rpcs { request_metadata, &skip_rpcs, min_block_needed, + max_block_needed, ) .await? { @@ -1007,7 +1042,8 @@ impl Web3Rpcs { authorization: &Arc, request: &JsonRpcRequest, request_metadata: Option>, - block_needed: Option<&U64>, + min_block_needed: Option<&U64>, + max_block_needed: Option<&U64>, error_level: Level, max_count: Option, always_include_backups: bool, @@ -1016,7 +1052,8 @@ impl Web3Rpcs { match self .all_connections( authorization, - block_needed, + min_block_needed, + max_block_needed, max_count, always_include_backups, ) @@ -1099,6 +1136,7 @@ impl Web3Rpcs { request: JsonRpcRequest, request_metadata: Option<&Arc>, min_block_needed: Option<&U64>, + max_block_needed: Option<&U64>, ) -> anyhow::Result { match proxy_mode { ProxyMode::Best => { @@ -1107,6 +1145,7 @@ impl Web3Rpcs { request, request_metadata, min_block_needed, + max_block_needed, ) .await } @@ -1154,7 +1193,7 @@ impl Serialize for Web3Rpcs { /// sort by block number (descending) and tier (ascending) /// TODO: should this be moved into a `impl Web3Rpc`? /// TODO: take AsRef or something like that? We don't need an Arc here -fn sort_rpcs_by_sync_status(x: &Arc) -> (u64, u64, u32) { +fn rpc_sync_status_sort_key(x: &Arc) -> (u64, u64, u32) { let reversed_head_block = u64::MAX - x.head_block .read() @@ -1251,7 +1290,7 @@ mod tests { .map(Arc::new) .collect(); - rpcs.sort_by_cached_key(sort_rpcs_by_sync_status); + rpcs.sort_by_cached_key(rpc_sync_status_sort_key); let names_in_sort_order: Vec<_> = rpcs.iter().map(|x| x.name.as_str()).collect(); @@ -1395,7 +1434,7 @@ mod tests { // all_backend_connections gives all non-backup servers regardless of sync status assert_eq!( conns - .all_connections(&authorization, None, None, false) + .all_connections(&authorization, None, None, None, false) .await .unwrap() .len(), @@ -1404,7 +1443,7 @@ mod tests { // best_synced_backend_connection requires servers to be synced with the head block let x = conns - .best_consensus_head_connection(&authorization, None, &[], None) + .best_consensus_head_connection(&authorization, None, &[], None, None) .await .unwrap(); @@ -1459,21 +1498,21 @@ mod tests { assert!(matches!( conns - .best_consensus_head_connection(&authorization, None, &[], None) + .best_consensus_head_connection(&authorization, None, &[], None, None) .await, Ok(OpenRequestResult::Handle(_)) )); assert!(matches!( conns - .best_consensus_head_connection(&authorization, None, &[], Some(&0.into())) + .best_consensus_head_connection(&authorization, None, &[], Some(&0.into()), None) .await, Ok(OpenRequestResult::Handle(_)) )); assert!(matches!( conns - .best_consensus_head_connection(&authorization, None, &[], Some(&1.into())) + .best_consensus_head_connection(&authorization, None, &[], Some(&1.into()), None) .await, Ok(OpenRequestResult::Handle(_)) )); @@ -1481,7 +1520,7 @@ mod tests { // future block should not get a handle assert!(matches!( conns - .best_consensus_head_connection(&authorization, None, &[], Some(&2.into())) + .best_consensus_head_connection(&authorization, None, &[], Some(&2.into()), None) .await, Ok(OpenRequestResult::NotReady(true)) )); @@ -1605,8 +1644,15 @@ mod tests { assert_eq!(conns.num_synced_rpcs(), 2); // best_synced_backend_connection requires servers to be synced with the head block + // TODO: test with and without passing the head_block.number? let best_head_server = conns - .best_consensus_head_connection(&authorization, None, &[], Some(&head_block.number())) + .best_consensus_head_connection( + &authorization, + None, + &[], + Some(&head_block.number()), + None, + ) .await; assert!(matches!( @@ -1615,7 +1661,7 @@ mod tests { )); let best_archive_server = conns - .best_consensus_head_connection(&authorization, None, &[], Some(&1.into())) + .best_consensus_head_connection(&authorization, None, &[], Some(&1.into()), None) .await; match best_archive_server { From 4904bc6734aca985f654d3507988f70bd25d9049 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Sat, 11 Feb 2023 14:19:33 +0000 Subject: [PATCH 16/47] will wait for how to login as a third party user --- scripts/manual-tests/16-change-user-tier.sh | 25 +++++++++++++++++ scripts/manual-tests/19-admin-imitate-user.sh | 27 +++++++++++++++++++ web3_proxy/src/admin_queries.rs | 7 ++--- 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/scripts/manual-tests/16-change-user-tier.sh b/scripts/manual-tests/16-change-user-tier.sh index e69de29b..42791bd3 100644 --- a/scripts/manual-tests/16-change-user-tier.sh +++ b/scripts/manual-tests/16-change-user-tier.sh @@ -0,0 +1,25 @@ +# docker-compose up -d +# rm -rf data/ +# sea-orm-cli migrate up + +# Use CLI to create a user +RUSTFLAGS="--cfg tokio_unstable" cargo run create_user --address 0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a +RUSTFLAGS="--cfg tokio_unstable" cargo run change_admin_status 0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a true + +# Run the proxyd instance +# cargo run --release -- proxyd + +# Check if the instance is running +# curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"web3_clientVersion","id":1}' 127.0.0.1:8544 + +# Login as user first +curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a" +#curl -X POST -H "Content-Type: application/json" --data '{}' 127.0.0.1:8544/user/login +curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/" +#curl -X GET "127.0.0.1:8544/admin/modify_role?user_address=0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a&user_tier_title=Unlimited" + + +# Now modify the user role and check this in the database +curl -X GET "127.0.0.1:8544/admin/modify_role?user_address=0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a&user_tier_title=Unlimited" + +# docker-compose down diff --git a/scripts/manual-tests/19-admin-imitate-user.sh b/scripts/manual-tests/19-admin-imitate-user.sh index e69de29b..22777444 100644 --- a/scripts/manual-tests/19-admin-imitate-user.sh +++ b/scripts/manual-tests/19-admin-imitate-user.sh @@ -0,0 +1,27 @@ +# Admin can login as a user ... (but again, we must first have logged in +# docker-compose up -d +# rm -rf data/ +# sea-orm-cli migrate up + +RUSTFLAGS="--cfg tokio_unstable" cargo run create_user --address 0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a +RUSTFLAGS="--cfg tokio_unstable" cargo run change_admin_status 0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a true + +# Run the proxyd instance +# cargo run --release -- proxyd + +# Check if the instance is running +# curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"web3_clientVersion","id":1}' 127.0.0.1:8544 + +# Login as user first +curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a" +#curl -X POST -H "Content-Type: application/json" --data '{}' 127.0.0.1:8544/user/login +curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/" + +# Now modify the user role and check this in the database +# Now we can get a bearer-token to imitate the user +curl -X GET "127.0.0.1:8544/admin/imitate-login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a" +#curl -X POST -H "Content-Type: application/json" --data '{}' 127.0.0.1:8544/user/login +curl -X GET "127.0.0.1:8544/admin/imitate-login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/" + + +# docker-compose down diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index cfe7eb34..c86e0ba8 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -15,9 +15,8 @@ use ethers::utils::keccak256; use hashbrown::HashMap; use http::StatusCode; use migration::sea_orm::{self, ActiveModelTrait, ColumnTrait, EntityTrait, IntoActiveModel, QueryFilter}; -use log::info; +use log::{info, debug}; use redis_rate_limiter::redis::AsyncCommands; -use crate::frontend::errors::FrontendErrorResponse::AccessDenied; // TODO: Add some logic to check if the operating user is an admin // If he is, return true @@ -61,12 +60,14 @@ pub async fn query_admin_modify_usertier<'a>( // get the user id first. if it is 0, we should use a cache on the app let caller_id = get_user_id_from_params(&mut redis_conn, &db_conn, &db_replica, bearer, ¶ms).await?; + debug!("Caller id is: {:?}", caller_id); + // Check if the caller is an admin (i.e. if he is in an admin table) let admin: admin::Model = admin::Entity::find() .filter(admin::Column::UserId.eq(caller_id)) .one(db_replica.conn()) .await? - .ok_or(AccessDenied)?; + .ok_or(FrontendErrorResponse::AccessDenied)?; // If we are here, that means an admin was found, and we can safely proceed From d2f7ad5e5158f5527f1ab15228c3ac91ee4f5cb9 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Sat, 11 Feb 2023 07:11:20 -0800 Subject: [PATCH 17/47] move curl docs from internal docs to this repo --- docs/curl login.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 docs/curl login.md diff --git a/docs/curl login.md b/docs/curl login.md new file mode 100644 index 00000000..16ec43b7 --- /dev/null +++ b/docs/curl login.md @@ -0,0 +1,10 @@ +# log in with curl + +1. curl http://127.0.0.1:8544/user/login/$ADDRESS +2. Sign the text with a site like https://www.myetherwallet.com/wallet/sign +3. POST the signed data: + + curl -X POST http://127.0.0.1:8544/user/login -H 'Content-Type: application/json' -d + '{ "address": "0x9eb9e3dc2543dc9ff4058e2a2da43a855403f1fd", "msg": "0x6c6c616d616e6f6465732e636f6d2077616e747320796f7520746f207369676e20696e207769746820796f757220457468657265756d206163636f756e743a0a3078396562396533646332353433646339464634303538653241324441343341383535343033463166440a0af09fa699f09fa699f09fa699f09fa699f09fa6990a0a5552493a2068747470733a2f2f6c6c616d616e6f6465732e636f6d2f0a56657273696f6e3a20310a436861696e2049443a20310a4e6f6e63653a203031474d37373330375344324448333854454d3957545156454a0a4973737565642041743a20323032322d31322d31345430323a32333a31372e3735333736335a0a45787069726174696f6e2054696d653a20323032322d31322d31345430323a34333a31372e3735333736335a", "sig": "16bac055345279723193737c6c67cf995e821fd7c038d31fd6f671102088c7b85ab4b13069fd2ed02da186cf549530e315d8d042d721bf81289b3ffdbe8cf9ce1c", "version": "3", "signer": "MEW" }' + +4. The response will include a bearer token. Use it with curl ... -H 'Authorization: Bearer $TOKEN' From c008f50943dcfec432a9ad2c5017d8745be66c2b Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Sun, 12 Feb 2023 01:22:53 -0800 Subject: [PATCH 18/47] use http and ws providers in a single config --- Cargo.lock | 2 +- config/example.toml | 28 +- config/minimal.toml | 17 +- web3_proxy/Cargo.toml | 2 +- web3_proxy/src/app/mod.rs | 37 +- web3_proxy/src/bin/web3_proxy_cli/daemon.rs | 28 +- web3_proxy/src/block_number.rs | 6 + web3_proxy/src/config.rs | 46 +- web3_proxy/src/rpcs/blockchain.rs | 3 +- web3_proxy/src/rpcs/grpc_erigon.rs | 0 web3_proxy/src/rpcs/http.rs | 0 web3_proxy/src/rpcs/many.rs | 108 +-- web3_proxy/src/rpcs/one.rs | 769 +++++++++----------- web3_proxy/src/rpcs/provider.rs | 33 +- web3_proxy/src/rpcs/request.rs | 120 ++- web3_proxy/src/rpcs/transactions.rs | 4 +- web3_proxy/src/rpcs/ws.rs | 0 17 files changed, 564 insertions(+), 639 deletions(-) create mode 100644 web3_proxy/src/rpcs/grpc_erigon.rs create mode 100644 web3_proxy/src/rpcs/http.rs create mode 100644 web3_proxy/src/rpcs/ws.rs diff --git a/Cargo.lock b/Cargo.lock index feec357a..0196af87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5761,7 +5761,7 @@ dependencies = [ [[package]] name = "web3_proxy" -version = "0.13.0" +version = "0.13.1" dependencies = [ "anyhow", "argh", diff --git a/config/example.toml b/config/example.toml index e2c9d8b7..8227635f 100644 --- a/config/example.toml +++ b/config/example.toml @@ -52,50 +52,50 @@ response_cache_max_bytes = 10_000_000_000 [balanced_rpcs.ankr] display_name = "Ankr" - url = "https://rpc.ankr.com/eth" + http_url = "https://rpc.ankr.com/eth" soft_limit = 1_000 tier = 0 [balanced_rpcs.cloudflare] display_name = "Cloudflare" - url = "https://cloudflare-eth.com" + http_url = "https://cloudflare-eth.com" soft_limit = 1_000 tier = 1 [balanced_rpcs.blastapi] display_name = "Blast" - url = "https://eth-mainnet.public.blastapi.io" + http_url = "https://eth-mainnet.public.blastapi.io" soft_limit = 1_000 tier = 1 [balanced_rpcs.mycryptoapi] display_name = "MyCrypto" disabled = true - url = "https://api.mycryptoapi.com/eth" + http_url = "https://api.mycryptoapi.com/eth" soft_limit = 1_000 tier = 2 [balanced_rpcs.pokt-v1] display_name = "Pokt #1" - url = "https://eth-mainnet.gateway.pokt.network/v1/5f3453978e354ab992c4da79" + http_url = "https://eth-mainnet.gateway.pokt.network/v1/5f3453978e354ab992c4da79" soft_limit = 500 tier = 2 [balanced_rpcs.pokt] display_name = "Pokt #2" - url = "https://eth-rpc.gateway.pokt.network" + http_url = "https://eth-rpc.gateway.pokt.network" soft_limit = 500 tier = 3 [balanced_rpcs.linkpool] display_name = "Linkpool" - url = "https://main-rpc.linkpool.io" + http_url = "https://main-rpc.linkpool.io" soft_limit = 500 tier = 4 [balanced_rpcs.runonflux] display_name = "Run on Flux (light)" - url = "https://ethereumnodelight.app.runonflux.io" + http_url = "https://ethereumnodelight.app.runonflux.io" soft_limit = 1_000 tier = 5 @@ -103,7 +103,7 @@ response_cache_max_bytes = 10_000_000_000 [balanced_rpcs.linkpool-light] display_name = "Linkpool (light)" disabled = true - url = "https://main-light.eth.linkpool.io" + http_url = "https://main-light.eth.linkpool.io" soft_limit = 100 tier = 5 @@ -114,34 +114,34 @@ response_cache_max_bytes = 10_000_000_000 [private_rpcs.eden] disabled = true display_name = "Eden network" - url = "https://api.edennetwork.io/v1/" + http_url = "https://api.edennetwork.io/v1/" soft_limit = 1_805 tier = 0 [private_rpcs.eden_beta] disabled = true display_name = "Eden network beta" - url = "https://api.edennetwork.io/v1/beta" + http_url = "https://api.edennetwork.io/v1/beta" soft_limit = 5_861 tier = 0 [private_rpcs.ethermine] disabled = true display_name = "Ethermine" - url = "https://rpc.ethermine.org" + http_url = "https://rpc.ethermine.org" soft_limit = 5_861 tier = 0 [private_rpcs.flashbots] disabled = true display_name = "Flashbots Fast" - url = "https://rpc.flashbots.net/fast" + http_url = "https://rpc.flashbots.net/fast" soft_limit = 7_074 tier = 0 [private_rpcs.securerpc] disabled = true display_name = "SecureRPC" - url = "https://gibson.securerpc.com/v1" + http_url = "https://gibson.securerpc.com/v1" soft_limit = 4_560 tier = 0 diff --git a/config/minimal.toml b/config/minimal.toml index 2225c9d1..770e3484 100644 --- a/config/minimal.toml +++ b/config/minimal.toml @@ -16,17 +16,26 @@ response_cache_max_bytes = 1_000_000_000 [balanced_rpcs] - [balanced_rpcs.llama_public_wss] + [balanced_rpcs.llama_public_both] # TODO: what should we do if all rpcs are disabled? warn and wait for a config change? disabled = false - display_name = "LlamaNodes WSS" - url = "wss://eth.llamarpc.com/" + display_name = "LlamaNodes Both" + ws_url = "wss://eth.llamarpc.com/" + http_url = "https://eth.llamarpc.com/" soft_limit = 1_000 tier = 0 [balanced_rpcs.llama_public_https] disabled = false display_name = "LlamaNodes HTTPS" - url = "https://eth.llamarpc.com/" + http_url = "https://eth.llamarpc.com/" + soft_limit = 1_000 + tier = 0 + + [balanced_rpcs.llama_public_wss] + # TODO: what should we do if all rpcs are disabled? warn and wait for a config change? + disabled = false + display_name = "LlamaNodes WSS" + ws_url = "wss://eth.llamarpc.com/" soft_limit = 1_000 tier = 0 diff --git a/web3_proxy/Cargo.toml b/web3_proxy/Cargo.toml index d60d162d..9ebe64e5 100644 --- a/web3_proxy/Cargo.toml +++ b/web3_proxy/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "web3_proxy" -version = "0.13.0" +version = "0.13.1" edition = "2021" default-run = "web3_proxy_cli" diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index 4a35eb71..91e9c95d 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -482,7 +482,7 @@ impl Web3ProxyApp { let http_client = Some( reqwest::ClientBuilder::new() .connect_timeout(Duration::from_secs(5)) - .timeout(Duration::from_secs(60)) + .timeout(Duration::from_secs(5 * 60)) .user_agent(APP_USER_AGENT) .build()?, ); @@ -573,17 +573,17 @@ impl Web3ProxyApp { // connect to the load balanced rpcs let (balanced_rpcs, balanced_handle) = Web3Rpcs::spawn( + block_map.clone(), top_config.app.chain_id, db_conn.clone(), - balanced_rpcs, http_client.clone(), - vredis_pool.clone(), - block_map.clone(), - Some(watch_consensus_head_sender), - top_config.app.min_sum_soft_limit, top_config.app.min_synced_rpcs, - Some(pending_tx_sender.clone()), + top_config.app.min_sum_soft_limit, pending_transactions.clone(), + Some(pending_tx_sender.clone()), + vredis_pool.clone(), + balanced_rpcs, + Some(watch_consensus_head_sender), ) .await .context("spawning balanced rpcs")?; @@ -599,21 +599,22 @@ impl Web3ProxyApp { None } else { let (private_rpcs, private_handle) = Web3Rpcs::spawn( + block_map, top_config.app.chain_id, db_conn.clone(), - private_rpcs, http_client.clone(), + 0, + 0, + pending_transactions.clone(), + // TODO: subscribe to pending transactions on the private rpcs? they seem to have low rate limits, but they should have + None, vredis_pool.clone(), - block_map, + private_rpcs, // subscribing to new heads here won't work well. if they are fast, they might be ahead of balanced_rpcs // they also often have low rate limits // however, they are well connected to miners/validators. so maybe using them as a safety check would be good + // TODO: but maybe we could include privates in the "backup" tier None, - 0, - 0, - // TODO: subscribe to pending transactions on the private rpcs? they seem to have low rate limits - None, - pending_transactions.clone(), ) .await .context("spawning private_rpcs")?; @@ -1035,9 +1036,17 @@ impl Web3ProxyApp { | "db_getString" | "db_putHex" | "db_putString" + | "debug_accountRange" + | "debug_backtraceAt" + | "debug_blockProfile" | "debug_chaindbCompact" + | "debug_chaindbProperty" + | "debug_cpuProfile" + | "debug_freeOSMemory" | "debug_freezeClient" + | "debug_gcStats" | "debug_goTrace" + | "debug_memStats" | "debug_mutexProfile" | "debug_setBlockProfileRate" | "debug_setGCPercent" diff --git a/web3_proxy/src/bin/web3_proxy_cli/daemon.rs b/web3_proxy/src/bin/web3_proxy_cli/daemon.rs index 62d742e5..cf2f4cf8 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/daemon.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/daemon.rs @@ -205,31 +205,27 @@ mod tests { ( "anvil".to_string(), Web3RpcConfig { - disabled: false, - display_name: None, - url: anvil.endpoint(), - backup: Some(false), - block_data_limit: None, + http_url: Some(anvil.endpoint()), soft_limit: 100, - hard_limit: None, tier: 0, - subscribe_txs: Some(false), - extra: Default::default(), + ..Default::default() }, ), ( "anvil_ws".to_string(), Web3RpcConfig { - disabled: false, - display_name: None, - url: anvil.ws_endpoint(), - backup: Some(false), - block_data_limit: None, + ws_url: Some(anvil.ws_endpoint()), soft_limit: 100, - hard_limit: None, tier: 0, - subscribe_txs: Some(false), - extra: Default::default(), + ..Default::default() + }, + ), + ( + "anvil_both".to_string(), + Web3RpcConfig { + http_url: Some(anvil.endpoint()), + ws_url: Some(anvil.ws_endpoint()), + ..Default::default() }, ), ]), diff --git a/web3_proxy/src/block_number.rs b/web3_proxy/src/block_number.rs index ef256b84..da708286 100644 --- a/web3_proxy/src/block_number.rs +++ b/web3_proxy/src/block_number.rs @@ -132,6 +132,12 @@ pub async fn block_needed( head_block_num: U64, rpcs: &Web3Rpcs, ) -> anyhow::Result { + // some requests have potentially very large responses + // TODO: only skip caching if the response actually is large + if method.starts_with("trace_") || method == "debug_traceTransaction" { + return Ok(BlockNeeded::CacheNever); + } + let params = if let Some(params) = params { // grab the params so we can inspect and potentially modify them params diff --git a/web3_proxy/src/config.rs b/web3_proxy/src/config.rs index 942632e7..2bec1bd0 100644 --- a/web3_proxy/src/config.rs +++ b/web3_proxy/src/config.rs @@ -197,15 +197,19 @@ fn default_response_cache_max_bytes() -> u64 { } /// Configuration for a backend web3 RPC server -#[derive(Clone, Debug, Deserialize)] +#[derive(Clone, Debug, Default, Deserialize)] pub struct Web3RpcConfig { /// simple way to disable a connection without deleting the row #[serde(default)] pub disabled: bool, /// a name used in /status and other user facing messages pub display_name: Option, - /// websocket (or http if no websocket) - pub url: String, + /// (deprecated) rpc url + pub url: Option, + /// while not absolutely required, a ws:// or wss:// connection will be able to subscribe to head blocks + pub ws_url: Option, + /// while not absolutely required, a http:// or https:// connection will allow erigon to stream JSON + pub http_url: Option, /// block data limit. If None, will be queried pub block_data_limit: Option, /// the requests per second at which the server starts slowing down @@ -213,14 +217,15 @@ pub struct Web3RpcConfig { /// the requests per second at which the server throws errors (rate limit or otherwise) pub hard_limit: Option, /// only use this rpc if everything else is lagging too far. this allows us to ignore fast but very low limit rpcs - pub backup: Option, + #[serde(default)] + pub backup: bool, /// All else equal, a server with a lower tier receives all requests #[serde(default = "default_tier")] pub tier: u64, /// Subscribe to the firehose of pending transactions /// Don't do this with free rpcs #[serde(default)] - pub subscribe_txs: Option, + pub subscribe_txs: bool, /// unknown config options get put here #[serde(flatten, default = "HashMap::default")] pub extra: HashMap, @@ -245,47 +250,24 @@ impl Web3RpcConfig { block_map: BlockHashesCache, block_sender: Option>, tx_id_sender: Option>, + reconnect: bool, ) -> anyhow::Result<(Arc, AnyhowJoinHandle<()>)> { if !self.extra.is_empty() { warn!("unknown Web3RpcConfig fields!: {:?}", self.extra.keys()); } - let hard_limit = match (self.hard_limit, redis_pool) { - (None, None) => None, - (Some(hard_limit), Some(redis_client_pool)) => Some((hard_limit, redis_client_pool)), - (None, Some(_)) => None, - (Some(_hard_limit), None) => { - return Err(anyhow::anyhow!( - "no redis client pool! needed for hard limit" - )) - } - }; - - let tx_id_sender = if self.subscribe_txs.unwrap_or(false) { - tx_id_sender - } else { - None - }; - - let backup = self.backup.unwrap_or(false); - Web3Rpc::spawn( + self, name, - self.display_name, chain_id, db_conn, - self.url, http_client, http_interval_sender, - hard_limit, - self.soft_limit, - backup, - self.block_data_limit, + redis_pool, block_map, block_sender, tx_id_sender, - true, - self.tier, + reconnect, ) .await } diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs index 679516a0..b70663f1 100644 --- a/web3_proxy/src/rpcs/blockchain.rs +++ b/web3_proxy/src/rpcs/blockchain.rs @@ -149,12 +149,13 @@ impl Web3Rpcs { // TODO: if error, retry? let block: ArcBlock = match rpc { Some(rpc) => rpc - .wait_for_request_handle(authorization, Some(Duration::from_secs(30)), false) + .wait_for_request_handle(authorization, Some(Duration::from_secs(30)), None) .await? .request::<_, Option<_>>( "eth_getBlockByHash", &json!(get_block_params), Level::Error.into(), + None, ) .await? .context("no block!")?, diff --git a/web3_proxy/src/rpcs/grpc_erigon.rs b/web3_proxy/src/rpcs/grpc_erigon.rs new file mode 100644 index 00000000..e69de29b diff --git a/web3_proxy/src/rpcs/http.rs b/web3_proxy/src/rpcs/http.rs new file mode 100644 index 00000000..e69de29b diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 32cfc8a0..086ce81b 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -56,17 +56,17 @@ impl Web3Rpcs { /// Spawn durable connections to multiple Web3 providers. #[allow(clippy::too_many_arguments)] pub async fn spawn( + block_map: BlockHashesCache, chain_id: u64, db_conn: Option, - server_configs: HashMap, http_client: Option, - redis_pool: Option, - block_map: BlockHashesCache, - watch_consensus_head_sender: Option>, - min_sum_soft_limit: u32, min_head_rpcs: usize, - pending_tx_sender: Option>, + min_sum_soft_limit: u32, pending_transactions: Cache, + pending_tx_sender: Option>, + redis_pool: Option, + server_configs: HashMap, + watch_consensus_head_sender: Option>, ) -> anyhow::Result<(Arc, AnyhowJoinHandle<()>)> { let (pending_tx_id_sender, pending_tx_id_receiver) = flume::unbounded(); let (block_sender, block_receiver) = flume::unbounded::(); @@ -160,6 +160,7 @@ impl Web3Rpcs { block_map, block_sender, pending_tx_id_sender, + true, ) .await }); @@ -343,7 +344,7 @@ impl Web3Rpcs { .into_iter() .map(|active_request_handle| async move { let result: Result, _> = active_request_handle - .request(method, &json!(¶ms), error_level.into()) + .request(method, &json!(¶ms), error_level.into(), None) .await; result }) @@ -473,12 +474,20 @@ impl Web3Rpcs { for x in self .conns .values() - .filter(|x| if allow_backups { true } else { !x.backup }) - .filter(|x| !skip.contains(x)) - .filter(|x| x.has_block_data(min_block_needed)) .filter(|x| { - if let Some(max_block_needed) = max_block_needed { - x.has_block_data(max_block_needed) + if !allow_backups && x.backup { + false + } else if skip.contains(x) { + false + } else if !x.has_block_data(min_block_needed) { + false + } else if max_block_needed + .and_then(|max_block_needed| { + Some(!x.has_block_data(max_block_needed)) + }) + .unwrap_or(false) + { + false } else { true } @@ -521,58 +530,22 @@ impl Web3Rpcs { let mut earliest_retry_at = None; for usable_rpcs in usable_rpcs_by_head_num_and_weight.into_values().rev() { - // under heavy load, it is possible for even our best server to be negative - let mut minimum = f64::MAX; - let mut maximum = f64::MIN; - // we sort on a combination of values. cache them here so that we don't do this math multiple times. - let mut available_request_map: HashMap<_, f64> = usable_rpcs + // TODO: is this necessary if we use sort_by_cached_key? + let available_request_map: HashMap<_, f64> = usable_rpcs .iter() .map(|rpc| { - // TODO: are active requests what we want? do we want a counter for requests in the last second + any actives longer than that? - // TODO: get active requests out of redis (that's definitely too slow) - // TODO: do something with hard limit instead? (but that is hitting redis too much) - let active_requests = rpc.active_requests() as f64; - let soft_limit = rpc.soft_limit as f64; - - let available_requests = soft_limit - active_requests; - - // trace!("available requests on {}: {}", rpc, available_requests); - - minimum = minimum.min(available_requests); - maximum = maximum.max(available_requests); - - (rpc, available_requests) + // TODO: weighted sort by remaining hard limit? + // TODO: weighted sort by soft_limit - ewma_active_requests? that assumes soft limits are any good + (rpc, 1.0) }) .collect(); - // trace!("minimum available requests: {}", minimum); - // trace!("maximum available requests: {}", maximum); - - if maximum < 0.0 { - // TODO: if maximum < 0 and there are other tiers on the same block, we should include them now - warn!("soft limits overloaded: {} to {}", minimum, maximum) - } - - // choose_multiple_weighted can't have negative numbers. shift up if any are negative - // TODO: is this a correct way to shift? - if minimum < 0.0 { - available_request_map = available_request_map - .into_iter() - .map(|(rpc, available_requests)| { - // TODO: is simple addition the right way to shift everyone? - // TODO: probably want something non-linear - // minimum is negative, so we subtract to make available requests bigger - let x = available_requests - minimum; - - (rpc, x) - }) - .collect() - } + debug!("todo: better sort here"); let sorted_rpcs = { if usable_rpcs.len() == 1 { - // TODO: return now instead? we shouldn't need another alloc + // TODO: try the next tier vec![usable_rpcs.get(0).expect("there should be 1")] } else { let mut rng = thread_fast_rng::thread_fast_rng(); @@ -589,12 +562,10 @@ impl Web3Rpcs { }; // now that the rpcs are sorted, try to get an active request handle for one of them + // TODO: pick two randomly and choose the one with the lower rpc.latency.ewma for best_rpc in sorted_rpcs.into_iter() { // increment our connection counter - match best_rpc - .try_request_handle(authorization, min_block_needed.is_none()) - .await - { + match best_rpc.try_request_handle(authorization, None).await { Ok(OpenRequestResult::Handle(handle)) => { // trace!("opened handle: {}", best_rpc); return Ok(OpenRequestResult::Handle(handle)); @@ -741,10 +712,7 @@ impl Web3Rpcs { } // check rate limits and increment our connection counter - match connection - .try_request_handle(authorization, min_block_needed.is_none()) - .await - { + match connection.try_request_handle(authorization, None).await { Ok(OpenRequestResult::RetryAt(retry_at)) => { // this rpc is not available. skip it earliest_retry_at = earliest_retry_at.min(Some(retry_at)); @@ -827,6 +795,7 @@ impl Web3Rpcs { &request.method, &json!(request.params), RequestRevertHandler::Save, + None, ) .await; @@ -1214,7 +1183,6 @@ mod tests { use super::*; use crate::rpcs::{ blockchain::{ConsensusFinder, SavedBlock}, - one::ProviderState, provider::Web3Provider, }; use ethers::types::{Block, U256}; @@ -1338,9 +1306,6 @@ mod tests { let head_rpc = Web3Rpc { name: "synced".to_string(), - provider_state: AsyncRwLock::new(ProviderState::Connected(Arc::new( - Web3Provider::Mock, - ))), soft_limit: 1_000, automatic_block_limit: false, backup: false, @@ -1352,9 +1317,6 @@ mod tests { let lagged_rpc = Web3Rpc { name: "lagged".to_string(), - provider_state: AsyncRwLock::new(ProviderState::Connected(Arc::new( - Web3Provider::Mock, - ))), soft_limit: 1_000, automatic_block_limit: false, backup: false, @@ -1553,9 +1515,6 @@ mod tests { let pruned_rpc = Web3Rpc { name: "pruned".to_string(), - provider_state: AsyncRwLock::new(ProviderState::Connected(Arc::new( - Web3Provider::Mock, - ))), soft_limit: 3_000, automatic_block_limit: false, backup: false, @@ -1567,9 +1526,6 @@ mod tests { let archive_rpc = Web3Rpc { name: "archive".to_string(), - provider_state: AsyncRwLock::new(ProviderState::Connected(Arc::new( - Web3Provider::Mock, - ))), soft_limit: 1_000, automatic_block_limit: false, backup: false, diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 9a01cd80..c1db0ad5 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -3,9 +3,9 @@ use super::blockchain::{ArcBlock, BlockHashesCache, SavedBlock}; use super::provider::Web3Provider; use super::request::{OpenRequestHandle, OpenRequestResult}; use crate::app::{flatten_handle, AnyhowJoinHandle}; -use crate::config::BlockAndRpc; +use crate::config::{BlockAndRpc, Web3RpcConfig}; use crate::frontend::authorization::Authorization; -use anyhow::Context; +use anyhow::{anyhow, Context}; use ethers::prelude::{Bytes, Middleware, ProviderError, TxHash, H256, U64}; use ethers::types::U256; use futures::future::try_join_all; @@ -21,50 +21,13 @@ use serde_json::json; use std::cmp::min; use std::fmt; use std::hash::{Hash, Hasher}; -use std::sync::atomic::{self, AtomicU32, AtomicU64}; +use std::sync::atomic::{self, AtomicU64}; use std::{cmp::Ordering, sync::Arc}; use thread_fast_rng::rand::Rng; use thread_fast_rng::thread_fast_rng; use tokio::sync::{broadcast, oneshot, watch, RwLock as AsyncRwLock}; use tokio::time::{sleep, sleep_until, timeout, Duration, Instant}; -// TODO: maybe provider state should have the block data limit in it. but it is inside an async lock and we can't Serialize then -#[derive(Clone, Debug)] -pub enum ProviderState { - None, - Connecting(Arc), - Connected(Arc), -} - -impl Default for ProviderState { - fn default() -> Self { - Self::None - } -} - -impl ProviderState { - pub async fn provider(&self, allow_not_ready: bool) -> Option<&Arc> { - match self { - ProviderState::None => None, - ProviderState::Connecting(x) => { - if allow_not_ready { - Some(x) - } else { - // TODO: do a ready check here? - None - } - } - ProviderState::Connected(x) => { - if x.ready() { - Some(x) - } else { - None - } - } - } - } -} - pub struct Web3RpcLatencies { /// Traack how far behind the fastest node we are pub new_head: Histogram, @@ -93,19 +56,15 @@ pub struct Web3Rpc { pub name: String, pub display_name: Option, pub db_conn: Option, - /// TODO: can we get this from the provider? do we even need it? - pub(super) url: String, + pub(super) ws_url: Option, + pub(super) http_url: Option, /// Some connections use an http_client. we keep a clone for reconnecting pub(super) http_client: Option, - /// keep track of currently open requests. We sort on this - pub(super) active_requests: AtomicU32, - /// keep track of total requests from the frontend - pub(super) frontend_requests: AtomicU64, - /// keep track of total requests from web3-proxy itself - pub(super) internal_requests: AtomicU64, /// provider is in a RwLock so that we can replace it if re-connecting /// it is an async lock because we hold it open across awaits - pub(super) provider_state: AsyncRwLock, + /// this provider is only used for new heads subscriptions + /// TODO: put the provider inside an arc? + pub(super) new_head_client: AsyncRwLock>>, /// keep track of hard limits pub(super) hard_limit_until: Option>, /// rate limits are stored in a central redis so that multiple proxies can share their rate limits @@ -121,7 +80,7 @@ pub struct Web3Rpc { pub(super) block_data_limit: AtomicU64, /// Lower tiers are higher priority when sending requests pub(super) tier: u64, - /// TODO: change this to a watch channel so that http providers can subscribe and take action on change + /// TODO: change this to a watch channel so that http providers can subscribe and take action on change. pub(super) head_block: RwLock>, /// Track how fast this RPC is pub(super) latency: Web3RpcLatencies, @@ -132,39 +91,54 @@ impl Web3Rpc { // TODO: have this take a builder (which will have channels attached). or maybe just take the config and give the config public fields #[allow(clippy::too_many_arguments)] pub async fn spawn( + mut config: Web3RpcConfig, name: String, - display_name: Option, chain_id: u64, db_conn: Option, - url_str: String, // optional because this is only used for http providers. websocket providers don't use it http_client: Option, + // TODO: rename to http_new_head_interval_sender? http_interval_sender: Option>>, - // TODO: have a builder struct for this. - hard_limit: Option<(u64, RedisPool)>, - // TODO: think more about this type - soft_limit: u32, - backup: bool, - block_data_limit: Option, + redis_pool: Option, + // TODO: think more about soft limit. watching ewma of requests is probably better. but what should the random sort be on? maybe group on tier is enough + // soft_limit: u32, block_map: BlockHashesCache, block_sender: Option>, tx_id_sender: Option)>>, reconnect: bool, - tier: u64, ) -> anyhow::Result<(Arc, AnyhowJoinHandle<()>)> { - let hard_limit = hard_limit.map(|(hard_rate_limit, redis_pool)| { - // TODO: is cache size 1 okay? i think we need - RedisRateLimiter::new( - "web3_proxy", - &format!("{}:{}", chain_id, name), - hard_rate_limit, - 60.0, - redis_pool, - ) - }); + let hard_limit = match (config.hard_limit, redis_pool) { + (None, None) => None, + (Some(hard_limit), Some(redis_pool)) => { + // TODO: in process rate limiter instead? or is deffered good enough? + let rrl = RedisRateLimiter::new( + "web3_proxy", + &format!("{}:{}", chain_id, name), + hard_limit, + 60.0, + redis_pool, + ); - // TODO: should we do this even if block_sender is None? then we would know limits on private relays - let block_data_limit: AtomicU64 = block_data_limit.unwrap_or_default().into(); + Some(rrl) + } + (None, Some(_)) => None, + (Some(_hard_limit), None) => { + return Err(anyhow::anyhow!( + "no redis client pool! needed for hard limit" + )) + } + }; + + let tx_id_sender = if config.subscribe_txs { + // TODO: warn if tx_id_sender is None? + tx_id_sender + } else { + None + }; + + let backup = config.backup; + + let block_data_limit: AtomicU64 = config.block_data_limit.unwrap_or_default().into(); let automatic_block_limit = (block_data_limit.load(atomic::Ordering::Acquire) == 0) && block_sender.is_some(); @@ -178,19 +152,36 @@ impl Web3Rpc { None }; + if config.ws_url.is_none() && config.http_url.is_none() { + if let Some(url) = config.url { + if url.starts_with("ws") { + config.ws_url = Some(url); + } else if url.starts_with("http") { + config.http_url = Some(url); + } else { + return Err(anyhow!("only ws or http urls are supported")); + } + } else { + return Err(anyhow!( + "either ws_url or http_url are required. it is best to set both" + )); + } + } + let new_connection = Self { name, db_conn: db_conn.clone(), - display_name, + display_name: config.display_name, http_client, - url: url_str, + ws_url: config.ws_url, + http_url: config.http_url, hard_limit, hard_limit_until, - soft_limit, + soft_limit: config.soft_limit, automatic_block_limit, backup, block_data_limit, - tier, + tier: config.tier, ..Default::default() }; @@ -224,6 +215,7 @@ impl Web3Rpc { async fn check_block_data_limit( self: &Arc, authorization: &Arc, + unlocked_provider: Option>, ) -> anyhow::Result> { if !self.automatic_block_limit { // TODO: is this a good thing to return? @@ -238,7 +230,7 @@ impl Web3Rpc { // TODO: start at 0 or 1? for block_data_limit in [0, 32, 64, 128, 256, 512, 1024, 90_000, u64::MAX] { let handle = self - .wait_for_request_handle(authorization, None, true) + .wait_for_request_handle(authorization, None, unlocked_provider.clone()) .await?; let head_block_num_future = handle.request::, U256>( @@ -246,6 +238,7 @@ impl Web3Rpc { &None, // error here are expected, so keep the level low Level::Debug.into(), + unlocked_provider.clone(), ); let head_block_num = timeout(Duration::from_secs(5), head_block_num_future) @@ -264,7 +257,7 @@ impl Web3Rpc { // TODO: wait for the handle BEFORE we check the current block number. it might be delayed too! // TODO: what should the request be? let handle = self - .wait_for_request_handle(authorization, None, true) + .wait_for_request_handle(authorization, None, unlocked_provider.clone()) .await?; let archive_result: Result = handle @@ -276,6 +269,7 @@ impl Web3Rpc { )), // error here are expected, so keep the level low Level::Trace.into(), + unlocked_provider.clone(), ) .await; @@ -403,119 +397,111 @@ impl Web3Rpc { chain_id: u64, db_conn: Option<&DatabaseConnection>, ) -> anyhow::Result<()> { - // trace!("provider_state {} locking...", self); - let mut provider_state = self - .provider_state - .try_write() - .context("locking provider for write")?; - // trace!("provider_state {} locked: {:?}", self, provider_state); - - match &*provider_state { - ProviderState::None => { - info!("connecting to {}", self); + if let Ok(mut unlocked_provider) = self.new_head_client.try_write() { + #[cfg(test)] + if let Some(Web3Provider::Mock) = unlocked_provider.as_deref() { + return Ok(()); } - ProviderState::Connecting(provider) | ProviderState::Connected(provider) => { - // disconnect the current provider - if let Web3Provider::Mock = provider.as_ref() { - return Ok(()); + + *unlocked_provider = if let Some(ws_url) = self.ws_url.as_ref() { + // set up ws client + match &*unlocked_provider { + None => { + info!("connecting to {}", self); + } + Some(_) => { + debug!("reconnecting to {}", self); + + // tell the block subscriber that this rpc doesn't have any blocks + if let Some(block_sender) = block_sender { + block_sender + .send_async((None, self.clone())) + .await + .context("block_sender during connect")?; + } + + // reset sync status + let mut head_block = self.head_block.write(); + *head_block = None; + + // disconnect the current provider + // TODO: what until the block_sender's receiver finishes updating this item? + *unlocked_provider = None; + } } - debug!("reconnecting to {}", self); + let p = Web3Provider::from_str(ws_url.as_str(), None) + .await + .context(format!("failed connecting to {}", ws_url))?; - // disconnect the current provider - *provider_state = ProviderState::None; + assert!(p.ws().is_some()); - // reset sync status - // trace!("locking head block on {}", self); - { - let mut head_block = self.head_block.write(); - *head_block = None; - } - // trace!("done with head block on {}", self); - - // tell the block subscriber that we don't have any blocks - if let Some(block_sender) = block_sender { - block_sender - .send_async((None, self.clone())) + Some(Arc::new(p)) + } else { + // http client + if let Some(url) = &self.http_url { + let p = Web3Provider::from_str(url, self.http_client.clone()) .await - .context("block_sender during connect")?; + .context(format!("failed connecting to {}", url))?; + + assert!(p.http().is_some()); + + Some(Arc::new(p)) + } else { + None + } + }; + + let authorization = Arc::new(Authorization::internal(db_conn.cloned())?); + + // check the server's chain_id here + // TODO: some public rpcs (on bsc and fantom) do not return an id and so this ends up being an error + // TODO: what should the timeout be? should there be a request timeout? + // trace!("waiting on chain id for {}", self); + let found_chain_id: Result = self + .wait_for_request_handle(&authorization, None, unlocked_provider.clone()) + .await? + .request( + "eth_chainId", + &json!(Option::None::<()>), + Level::Trace.into(), + unlocked_provider.clone(), + ) + .await; + // trace!("found_chain_id: {:?}", found_chain_id); + + match found_chain_id { + Ok(found_chain_id) => { + // TODO: there has to be a cleaner way to do this + if chain_id != found_chain_id.as_u64() { + return Err(anyhow::anyhow!( + "incorrect chain id! Config has {}, but RPC has {}", + chain_id, + found_chain_id + ) + .context(format!("failed @ {}", self))); + } + } + Err(e) => { + return Err(anyhow::Error::from(e)); } } - } - // trace!("Creating new Web3Provider on {}", self); - // TODO: if this fails, keep retrying! otherwise it crashes and doesn't try again! - let new_provider = Web3Provider::from_str(&self.url, self.http_client.clone()).await?; + self.check_block_data_limit(&authorization, unlocked_provider.clone()) + .await?; - // trace!("saving provider state as NotReady on {}", self); - *provider_state = ProviderState::Connecting(Arc::new(new_provider)); + drop(unlocked_provider); - // drop the lock so that we can get a request handle - // trace!("provider_state {} unlocked", self); - drop(provider_state); - - let authorization = Arc::new(Authorization::internal(db_conn.cloned())?); - - // check the server's chain_id here - // TODO: some public rpcs (on bsc and fantom) do not return an id and so this ends up being an error - // TODO: what should the timeout be? should there be a request timeout? - // trace!("waiting on chain id for {}", self); - let found_chain_id: Result = self - .wait_for_request_handle(&authorization, None, true) - .await? - .request( - "eth_chainId", - &json!(Option::None::<()>), - Level::Trace.into(), - ) - .await; - // trace!("found_chain_id: {:?}", found_chain_id); - - match found_chain_id { - Ok(found_chain_id) => { - // TODO: there has to be a cleaner way to do this - if chain_id != found_chain_id.as_u64() { - return Err(anyhow::anyhow!( - "incorrect chain id! Config has {}, but RPC has {}", - chain_id, - found_chain_id - ) - .context(format!("failed @ {}", self))); - } + info!("successfully connected to {}", self); + } else { + if self.new_head_client.read().await.is_none() { + return Err(anyhow!("failed waiting for client")); } - Err(e) => { - return Err(anyhow::Error::from(e)); - } - } - - self.check_block_data_limit(&authorization).await?; - - { - // trace!("locking for ready..."); - let mut provider_state = self.provider_state.write().await; - // trace!("locked for ready..."); - - // TODO: do this without a clone - let ready_provider = provider_state - .provider(true) - .await - .context("provider missing")? - .clone(); - - *provider_state = ProviderState::Connected(ready_provider); - // trace!("unlocked for ready..."); - } - - info!("successfully connected to {}", self); + }; Ok(()) } - #[inline] - pub fn active_requests(&self) -> u32 { - self.active_requests.load(atomic::Ordering::Acquire) - } - async fn send_head_block_result( self: &Arc, new_head_block: Result, ProviderError>, @@ -558,7 +544,7 @@ impl Web3Rpc { if self.block_data_limit() == U64::zero() { let authorization = Arc::new(Authorization::internal(self.db_conn.clone())?); - if let Err(err) = self.check_block_data_limit(&authorization).await { + if let Err(err) = self.check_block_data_limit(&authorization, None).await { warn!( "failed checking block limit after {} finished syncing. {:?}", self, err @@ -629,29 +615,21 @@ impl Web3Rpc { // provider is ready ready_tx.send(()).unwrap(); - // wait before doing the initial health check // TODO: how often? - // TODO: subscribe to self.head_block + // TODO: reset this timeout every time a new block is seen let health_sleep_seconds = 10; + + // wait before doing the initial health check sleep(Duration::from_secs(health_sleep_seconds)).await; loop { // TODO: what if we just happened to have this check line up with another restart? // TODO: think more about this - // trace!("health check on {}. locking...", conn); - if conn - .provider_state - .read() - .await - .provider(false) - .await - .is_none() - { + if let Some(client) = &*conn.new_head_client.read().await { // trace!("health check unlocked with error on {}", conn); // returning error will trigger a reconnect - return Err(anyhow::anyhow!("{} is not ready", conn)); + // TODO: do a query of some kind } - // trace!("health check on {}. unlocked", conn); sleep(Duration::from_secs(health_sleep_seconds)).await; } @@ -712,7 +690,7 @@ impl Web3Rpc { Ok(()) } - /// Subscribe to new blocks. If `reconnect` is true, this runs forever. + /// Subscribe to new blocks. async fn subscribe_new_heads( self: Arc, authorization: Arc, @@ -722,233 +700,218 @@ impl Web3Rpc { ) -> anyhow::Result<()> { trace!("watching new heads on {}", self); - // trace!("locking on new heads"); - let provider_state = self - .provider_state - .try_read() - .context("subscribe_new_heads")? - .clone(); - // trace!("unlocked on new heads"); + let unlocked_provider = self.new_head_client.read().await; - // TODO: need a timeout - if let ProviderState::Connected(provider) = provider_state { - match provider.as_ref() { - Web3Provider::Mock => unimplemented!(), - Web3Provider::Http(_provider) => { - // there is a "watch_blocks" function, but a lot of public nodes do not support the necessary rpc endpoints - // TODO: try watch_blocks and fall back to this? + match unlocked_provider.as_deref() { + Some(Web3Provider::Http(_client)) => { + // there is a "watch_blocks" function, but a lot of public nodes do not support the necessary rpc endpoints + // TODO: try watch_blocks and fall back to this? - let mut http_interval_receiver = http_interval_receiver.unwrap(); + let mut http_interval_receiver = http_interval_receiver.unwrap(); - let mut last_hash = H256::zero(); + let mut last_hash = H256::zero(); - loop { - // TODO: what should the max_wait be? - match self - .wait_for_request_handle(&authorization, None, false) - .await - { - Ok(active_request_handle) => { - let block: Result, _> = active_request_handle - .request( - "eth_getBlockByNumber", - &json!(("latest", false)), - Level::Warn.into(), + loop { + // TODO: what should the max_wait be? + match self + .wait_for_request_handle(&authorization, None, unlocked_provider.clone()) + .await + { + Ok(active_request_handle) => { + let block: Result, _> = active_request_handle + .request( + "eth_getBlockByNumber", + &json!(("latest", false)), + Level::Warn.into(), + None, + ) + .await; + + match block { + Ok(None) => { + warn!("no head block on {}", self); + + self.send_head_block_result( + Ok(None), + &block_sender, + block_map.clone(), ) - .await; + .await?; + } + Ok(Some(block)) => { + // don't send repeat blocks + let new_hash = + block.hash.expect("blocks here should always have hashes"); - match block { - Ok(None) => { - warn!("no head block on {}", self); + if new_hash != last_hash { + // new hash! + last_hash = new_hash; self.send_head_block_result( - Ok(None), - &block_sender, - block_map.clone(), - ) - .await?; - } - Ok(Some(block)) => { - // don't send repeat blocks - let new_hash = block - .hash - .expect("blocks here should always have hashes"); - - if new_hash != last_hash { - // new hash! - last_hash = new_hash; - - self.send_head_block_result( - Ok(Some(block)), - &block_sender, - block_map.clone(), - ) - .await?; - } - } - Err(err) => { - // we did not get a block back. something is up with the server. take it out of rotation - self.send_head_block_result( - Err(err), + Ok(Some(block)), &block_sender, block_map.clone(), ) .await?; } } + Err(err) => { + // we did not get a block back. something is up with the server. take it out of rotation + self.send_head_block_result( + Err(err), + &block_sender, + block_map.clone(), + ) + .await?; + } } - Err(err) => { - warn!("Internal error on latest block from {}. {:?}", self, err); + } + Err(err) => { + warn!("Internal error on latest block from {}. {:?}", self, err); - self.send_head_block_result( - Ok(None), - &block_sender, - block_map.clone(), - ) + self.send_head_block_result(Ok(None), &block_sender, block_map.clone()) .await?; - // TODO: what should we do? sleep? extra time? - } + // TODO: what should we do? sleep? extra time? } + } - // wait for the next interval - // TODO: if error or rate limit, increase interval? - while let Err(err) = http_interval_receiver.recv().await { - match err { - broadcast::error::RecvError::Closed => { - // channel is closed! that's not good. bubble the error up - return Err(err.into()); - } - broadcast::error::RecvError::Lagged(lagged) => { - // querying the block was delayed - // this can happen if tokio is very busy or waiting for requests limits took too long - warn!("http interval on {} lagging by {}!", self, lagged); - } + // wait for the next interval + // TODO: if error or rate limit, increase interval? + while let Err(err) = http_interval_receiver.recv().await { + match err { + broadcast::error::RecvError::Closed => { + // channel is closed! that's not good. bubble the error up + return Err(err.into()); + } + broadcast::error::RecvError::Lagged(lagged) => { + // querying the block was delayed + // this can happen if tokio is very busy or waiting for requests limits took too long + warn!("http interval on {} lagging by {}!", self, lagged); } } } } - Web3Provider::Ws(provider) => { - // todo: move subscribe_blocks onto the request handle? - let active_request_handle = self - .wait_for_request_handle(&authorization, None, false) - .await; - let mut stream = provider.subscribe_blocks().await?; - drop(active_request_handle); - - // query the block once since the subscription doesn't send the current block - // there is a very small race condition here where the stream could send us a new block right now - // all it does is print "new block" for the same block as current block - // TODO: how does this get wrapped in an arc? does ethers handle that? - let block: Result, _> = self - .wait_for_request_handle(&authorization, None, false) - .await? - .request( - "eth_getBlockByNumber", - &json!(("latest", false)), - Level::Warn.into(), - ) - .await; - - let mut last_hash = match &block { - Ok(Some(new_block)) => new_block - .hash - .expect("blocks should always have a hash here"), - _ => H256::zero(), - }; - - self.send_head_block_result(block, &block_sender, block_map.clone()) - .await?; - - while let Some(new_block) = stream.next().await { - // TODO: check the new block's hash to be sure we don't send dupes - let new_hash = new_block - .hash - .expect("blocks should always have a hash here"); - - if new_hash == last_hash { - // some rpcs like to give us duplicates. don't waste our time on them - continue; - } else { - last_hash = new_hash; - } - - self.send_head_block_result( - Ok(Some(Arc::new(new_block))), - &block_sender, - block_map.clone(), - ) - .await?; - } - - // clear the head block. this might not be needed, but it won't hurt - self.send_head_block_result(Ok(None), &block_sender, block_map) - .await?; - - // TODO: is this always an error? - // TODO: we probably don't want a warn and to return error - warn!("new_heads subscription to {} ended", self); - Err(anyhow::anyhow!("new_heads subscription ended")) - } } - } else { - Err(anyhow::anyhow!( - "Provider not ready! Unable to subscribe to heads" - )) + Some(Web3Provider::Both(_, client)) | Some(Web3Provider::Ws(client)) => { + // todo: move subscribe_blocks onto the request handle? + let active_request_handle = self + .wait_for_request_handle(&authorization, None, unlocked_provider.clone()) + .await; + let mut stream = client.subscribe_blocks().await?; + drop(active_request_handle); + + // query the block once since the subscription doesn't send the current block + // there is a very small race condition here where the stream could send us a new block right now + // but all that does is print "new block" for the same block as current block + // TODO: how does this get wrapped in an arc? does ethers handle that? + // TODO: do this part over http? + let block: Result, _> = self + .wait_for_request_handle(&authorization, None, unlocked_provider.clone()) + .await? + .request( + "eth_getBlockByNumber", + &json!(("latest", false)), + Level::Warn.into(), + unlocked_provider.clone(), + ) + .await; + + let mut last_hash = match &block { + Ok(Some(new_block)) => new_block + .hash + .expect("blocks should always have a hash here"), + _ => H256::zero(), + }; + + self.send_head_block_result(block, &block_sender, block_map.clone()) + .await?; + + while let Some(new_block) = stream.next().await { + // TODO: check the new block's hash to be sure we don't send dupes + let new_hash = new_block + .hash + .expect("blocks should always have a hash here"); + + if new_hash == last_hash { + // some rpcs like to give us duplicates. don't waste our time on them + continue; + } else { + last_hash = new_hash; + } + + self.send_head_block_result( + Ok(Some(Arc::new(new_block))), + &block_sender, + block_map.clone(), + ) + .await?; + } + + // clear the head block. this might not be needed, but it won't hurt + self.send_head_block_result(Ok(None), &block_sender, block_map) + .await?; + + // TODO: is this always an error? + // TODO: we probably don't want a warn and to return error + warn!("new_heads subscription to {} ended", self); + Err(anyhow::anyhow!("new_heads subscription ended")) + } + None => todo!("what should happen now? wait for a connection?"), + #[cfg(test)] + Some(Web3Provider::Mock) => unimplemented!(), } } + /// Turn on the firehose of pending transactions async fn subscribe_pending_transactions( self: Arc, authorization: Arc, tx_id_sender: flume::Sender<(TxHash, Arc)>, ) -> anyhow::Result<()> { - if let ProviderState::Connected(provider) = self - .provider_state - .try_read() - .context("subscribe_pending_transactions")? - .clone() - { - trace!("watching pending transactions on {}", self); - // TODO: does this keep the lock open for too long? - match provider.as_ref() { - Web3Provider::Mock => unimplemented!(), - Web3Provider::Http(provider) => { - // there is a "watch_pending_transactions" function, but a lot of public nodes do not support the necessary rpc endpoints - // TODO: maybe subscribe to self.head_block? - // TODO: this keeps a read lock guard open on provider_state forever. is that okay for an http client? - futures::future::pending::<()>().await; - } - Web3Provider::Ws(provider) => { - // TODO: maybe the subscribe_pending_txs function should be on the active_request_handle - let active_request_handle = self - .wait_for_request_handle(&authorization, None, false) - .await?; + // TODO: give this a separate client. don't use new_head_client for everything. especially a firehose this big + // TODO: timeout + let provider = self.new_head_client.read().await; - let mut stream = provider.subscribe_pending_txs().await?; - - drop(active_request_handle); - - while let Some(pending_tx_id) = stream.next().await { - tx_id_sender - .send_async((pending_tx_id, self.clone())) - .await - .context("tx_id_sender")?; - - // TODO: periodically check for listeners. if no one is subscribed, unsubscribe and wait for a subscription - } - - // TODO: is this always an error? - // TODO: we probably don't want a warn and to return error - warn!("pending_transactions subscription ended on {}", self); - return Err(anyhow::anyhow!("pending_transactions subscription ended")); - } + trace!("watching pending transactions on {}", self); + // TODO: does this keep the lock open for too long? + match provider.as_deref() { + None => { + // TODO: wait for a provider + return Err(anyhow!("no provider")); } - } else { - warn!( - "Provider not ready! Unable to watch pending transactions on {}", - self - ); + Some(Web3Provider::Http(provider)) => { + // there is a "watch_pending_transactions" function, but a lot of public nodes do not support the necessary rpc endpoints + // TODO: maybe subscribe to self.head_block? + // TODO: this keeps a read lock guard open on provider_state forever. is that okay for an http client? + futures::future::pending::<()>().await; + } + Some(Web3Provider::Both(_, client)) | Some(Web3Provider::Ws(client)) => { + // TODO: maybe the subscribe_pending_txs function should be on the active_request_handle + let active_request_handle = self + .wait_for_request_handle(&authorization, None, provider.clone()) + .await?; + + let mut stream = client.subscribe_pending_txs().await?; + + drop(active_request_handle); + + while let Some(pending_tx_id) = stream.next().await { + tx_id_sender + .send_async((pending_tx_id, self.clone())) + .await + .context("tx_id_sender")?; + + // TODO: periodically check for listeners. if no one is subscribed, unsubscribe and wait for a subscription + } + + // TODO: is this always an error? + // TODO: we probably don't want a warn and to return error + warn!("pending_transactions subscription ended on {}", self); + return Err(anyhow::anyhow!("pending_transactions subscription ended")); + } + #[cfg(test)] + Some(Web3Provider::Mock) => futures::future::pending::<()>().await, } Ok(()) @@ -957,17 +920,17 @@ impl Web3Rpc { /// be careful with this; it might wait forever! /// `allow_not_ready` is only for use by health checks while starting the provider /// TODO: don't use anyhow. use specific error type - pub async fn wait_for_request_handle( - self: &Arc, - authorization: &Arc, + pub async fn wait_for_request_handle<'a>( + self: &'a Arc, + authorization: &'a Arc, max_wait: Option, - allow_not_ready: bool, + unlocked_provider: Option>, ) -> anyhow::Result { let max_wait = max_wait.map(|x| Instant::now() + x); loop { match self - .try_request_handle(authorization, allow_not_ready) + .try_request_handle(authorization, unlocked_provider.clone()) .await { Ok(OpenRequestResult::Handle(handle)) => return Ok(handle), @@ -1015,20 +978,14 @@ impl Web3Rpc { pub async fn try_request_handle( self: &Arc, authorization: &Arc, - // TODO? ready_provider: Option<&Arc>, - allow_not_ready: bool, + // TODO: borrow on this instead of needing to clone the Arc? + unlocked_provider: Option>, ) -> anyhow::Result { // TODO: think more about this read block - if !allow_not_ready - && self - .provider_state - .read() - .await - .provider(allow_not_ready) - .await - .is_none() - { - trace!("{} is not ready", self); + // TODO: this should *not* be new_head_client. this should be a separate object + if unlocked_provider.is_some() || self.new_head_client.read().await.is_some() { + // we already have an unlocked provider. no need to lock + } else { return Ok(OpenRequestResult::NotReady(self.backup)); } @@ -1144,15 +1101,11 @@ impl Serialize for Web3Rpc { state.serialize_field("soft_limit", &self.soft_limit)?; - state.serialize_field( - "active_requests", - &self.active_requests.load(atomic::Ordering::Relaxed), - )?; - - state.serialize_field( - "total_requests", - &self.frontend_requests.load(atomic::Ordering::Relaxed), - )?; + // TODO: keep this for the "popularity_contest" command? or maybe better to just use graphana? + // state.serialize_field( + // "frontend_requests", + // &self.frontend_requests.load(atomic::Ordering::Relaxed), + // )?; { // TODO: maybe this is too much data. serialize less? @@ -1216,7 +1169,7 @@ mod tests { let x = Web3Rpc { name: "name".to_string(), - url: "ws://example.com".to_string(), + ws_url: Some("ws://example.com".to_string()), soft_limit: 1_000, automatic_block_limit: false, backup: false, diff --git a/web3_proxy/src/rpcs/provider.rs b/web3_proxy/src/rpcs/provider.rs index add17a43..a65c7cea 100644 --- a/web3_proxy/src/rpcs/provider.rs +++ b/web3_proxy/src/rpcs/provider.rs @@ -2,22 +2,45 @@ use anyhow::Context; use derive_more::From; use std::time::Duration; +// TODO: our own structs for these that handle streaming large responses +type EthersHttpProvider = ethers::providers::Provider; +type EthersWsProvider = ethers::providers::Provider; + /// Use HTTP and WS providers. // TODO: instead of an enum, I tried to use Box, but hit +// TODO: custom types that let us stream JSON responses #[derive(From)] pub enum Web3Provider { - Http(ethers::providers::Provider), - Ws(ethers::providers::Provider), - // TODO: only include this for tests. + Both(EthersHttpProvider, EthersWsProvider), + Http(EthersHttpProvider), + // TODO: deadpool? custom tokio-tungstenite + Ws(EthersWsProvider), + #[cfg(test)] Mock, } impl Web3Provider { pub fn ready(&self) -> bool { match self { - Self::Mock => true, + Self::Both(_, ws) => ws.as_ref().ready(), Self::Http(_) => true, - Self::Ws(provider) => provider.as_ref().ready(), + Self::Ws(ws) => ws.as_ref().ready(), + #[cfg(test)] + Self::Mock => true, + } + } + + pub fn http(&self) -> Option<&EthersHttpProvider> { + match self { + Self::Http(x) => Some(x), + _ => None, + } + } + + pub fn ws(&self) -> Option<&EthersWsProvider> { + match self { + Self::Both(_, x) | Self::Ws(x) => Some(x), + _ => None, } } diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index da204992..e9d4baf0 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -1,6 +1,6 @@ use super::one::Web3Rpc; use super::provider::Web3Provider; -use crate::frontend::authorization::{Authorization, AuthorizationType}; +use crate::frontend::authorization::Authorization; use anyhow::Context; use chrono::Utc; use entities::revert_log; @@ -11,7 +11,6 @@ use log::{debug, error, trace, warn, Level}; use migration::sea_orm::{self, ActiveEnum, ActiveModelTrait}; use serde_json::json; use std::fmt; -use std::sync::atomic; use std::sync::Arc; use thread_fast_rng::rand::Rng; use tokio::time::{sleep, Duration, Instant}; @@ -27,11 +26,11 @@ pub enum OpenRequestResult { } /// Make RPC requests through this handle and drop it when you are done. +/// Opening this handle checks rate limits. Developers, try to keep opening a handle and using it as close together as possible #[derive(Debug)] pub struct OpenRequestHandle { authorization: Arc, conn: Arc, - provider: Arc, } /// Depending on the context, RPC errors can require different handling. @@ -123,60 +122,9 @@ impl Authorization { impl OpenRequestHandle { pub async fn new(authorization: Arc, conn: Arc) -> Self { - // TODO: take request_id as an argument? - // TODO: attach a unique id to this? customer requests have one, but not internal queries - // TODO: what ordering?! - conn.active_requests.fetch_add(1, atomic::Ordering::Relaxed); - - let mut provider = None; - let mut logged = false; - while provider.is_none() { - // trace!("waiting on provider: locking..."); - - let ready_provider = conn - .provider_state - .read() - .await - // TODO: hard code true, or take a bool in the `new` function? - .provider(true) - .await - .cloned(); - // trace!("waiting on provider: unlocked!"); - - match ready_provider { - None => { - if !logged { - logged = true; - warn!("no provider for {}!", conn); - } - - // TODO: how should this work? a reconnect should be in progress. but maybe force one now? - // TODO: sleep how long? subscribe to something instead? maybe use a watch handle? - // TODO: this is going to be way too verbose! - sleep(Duration::from_millis(100)).await - } - Some(x) => provider = Some(x), - } - } - let provider = provider.expect("provider was checked already"); - - // TODO: handle overflows? - // TODO: what ordering? - match authorization.as_ref().authorization_type { - AuthorizationType::Frontend => { - conn.frontend_requests - .fetch_add(1, atomic::Ordering::Relaxed); - } - AuthorizationType::Internal => { - conn.internal_requests - .fetch_add(1, atomic::Ordering::Relaxed); - } - } - Self { authorization, conn, - provider, } } @@ -196,6 +144,7 @@ impl OpenRequestHandle { method: &str, params: &P, revert_handler: RequestRevertHandler, + unlocked_provider: Option>, ) -> Result where // TODO: not sure about this type. would be better to not need clones, but measure and spawns combine to need it @@ -205,12 +154,45 @@ impl OpenRequestHandle { // TODO: use tracing spans // TODO: including params in this log is way too verbose // trace!(rpc=%self.conn, %method, "request"); + trace!("requesting from {}", self.conn); + + let mut provider: Option> = None; + let mut logged = false; + while provider.is_none() { + // trace!("waiting on provider: locking..."); + + // TODO: this should *not* be new_head_client. that is dedicated to only new heads + if let Some(unlocked_provider) = unlocked_provider { + provider = Some(unlocked_provider); + break; + } + + let unlocked_provider = self.conn.new_head_client.read().await; + + if let Some(unlocked_provider) = unlocked_provider.clone() { + provider = Some(unlocked_provider); + break; + } + + if !logged { + debug!("no provider for open handle on {}", self.conn); + logged = true; + } + + sleep(Duration::from_millis(100)).await; + } + + let provider = provider.expect("provider was checked already"); // TODO: replace ethers-rs providers with our own that supports streaming the responses - let response = match &*self.provider { + let response = match provider.as_ref() { + #[cfg(test)] Web3Provider::Mock => unimplemented!(), - Web3Provider::Http(provider) => provider.request(method, params).await, - Web3Provider::Ws(provider) => provider.request(method, params).await, + Web3Provider::Ws(p) => p.request(method, params).await, + Web3Provider::Http(p) | Web3Provider::Both(p, _) => { + // TODO: i keep hearing that http is faster. but ws has always been better for me. investigate more with actual benchmarks + p.request(method, params).await + } }; // // TODO: i think ethers already has trace logging (and does it much more fancy) @@ -266,8 +248,22 @@ impl OpenRequestHandle { // check for "execution reverted" here let response_type = if let ProviderError::JsonRpcClientError(err) = err { // Http and Ws errors are very similar, but different types - let msg = match &*self.provider { + let msg = match &*provider { + #[cfg(test)] Web3Provider::Mock => unimplemented!(), + Web3Provider::Both(_, _) => { + if let Some(HttpClientError::JsonRpcError(err)) = + err.downcast_ref::() + { + Some(&err.message) + } else if let Some(WsClientError::JsonRpcError(err)) = + err.downcast_ref::() + { + Some(&err.message) + } else { + None + } + } Web3Provider::Http(_) => { if let Some(HttpClientError::JsonRpcError(err)) = err.downcast_ref::() @@ -377,11 +373,3 @@ impl OpenRequestHandle { response } } - -impl Drop for OpenRequestHandle { - fn drop(&mut self) { - self.conn - .active_requests - .fetch_sub(1, atomic::Ordering::AcqRel); - } -} diff --git a/web3_proxy/src/rpcs/transactions.rs b/web3_proxy/src/rpcs/transactions.rs index dc5710d1..466a92be 100644 --- a/web3_proxy/src/rpcs/transactions.rs +++ b/web3_proxy/src/rpcs/transactions.rs @@ -28,13 +28,15 @@ impl Web3Rpcs { // TODO: might not be a race. might be a nonce thats higher than the current account nonce. geth discards chains // TODO: yearn devs have had better luck with batching these, but i think that's likely just adding a delay itself // TODO: if one rpc fails, try another? - let tx: Transaction = match rpc.try_request_handle(authorization, false).await { + // TODO: try_request_handle, or wait_for_request_handle? I think we want wait here + let tx: Transaction = match rpc.try_request_handle(authorization, None).await { Ok(OpenRequestResult::Handle(handle)) => { handle .request( "eth_getTransactionByHash", &(pending_tx_id,), Level::Error.into(), + None, ) .await? } diff --git a/web3_proxy/src/rpcs/ws.rs b/web3_proxy/src/rpcs/ws.rs new file mode 100644 index 00000000..e69de29b From 0e2e45cf90fba37ee6d0b6e5bbb291b08d10c9a1 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Sun, 12 Feb 2023 01:47:06 -0800 Subject: [PATCH 19/47] cargo upgrade --- Cargo.lock | 12 ++++++------ deferred-rate-limiter/Cargo.toml | 2 +- web3_proxy/Cargo.toml | 6 +++--- web3_proxy/src/rpcs/many.rs | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0196af87..89548b54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -282,9 +282,9 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "axum" -version = "0.6.4" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5694b64066a2459918d8074c2ce0d5a88f409431994c2356617c8ae0c4721fc" +checksum = "3114e77b361ec716aa429ae5c04243abe00cf7548e870b9370affcc5c491a7d0" dependencies = [ "async-trait", "axum-core", @@ -347,9 +347,9 @@ dependencies = [ [[package]] name = "axum-macros" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dbcf61bed07d554bd5c225cd07bc41b793eab63e79c6f0ceac7e1aed2f1c670" +checksum = "cb6bee4e05a5e0a5a67515ab24978efa7a80575a7a41a9fae35bb27fed6645d2" dependencies = [ "heck 0.4.0", "proc-macro2", @@ -2891,9 +2891,9 @@ dependencies = [ [[package]] name = "moka" -version = "0.9.7" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19b9268097a2cf211ac9955b1cc95e80fa84fff5c2d13ba292916445dc8a311f" +checksum = "2b6446f16d504e3d575df79cabb11bfbe9f24b17e9562d964a815db7b28ae3ec" dependencies = [ "async-io", "async-lock", diff --git a/deferred-rate-limiter/Cargo.toml b/deferred-rate-limiter/Cargo.toml index 9b7c4ad8..04cb8488 100644 --- a/deferred-rate-limiter/Cargo.toml +++ b/deferred-rate-limiter/Cargo.toml @@ -10,5 +10,5 @@ redis-rate-limiter = { path = "../redis-rate-limiter" } anyhow = "1.0.69" hashbrown = "0.13.2" log = "0.4.17" -moka = { version = "0.9.7", default-features = false, features = ["future"] } +moka = { version = "0.10.0", default-features = false, features = ["future"] } tokio = "1.25.0" diff --git a/web3_proxy/Cargo.toml b/web3_proxy/Cargo.toml index 9ebe64e5..e4a9b93c 100644 --- a/web3_proxy/Cargo.toml +++ b/web3_proxy/Cargo.toml @@ -27,9 +27,9 @@ thread-fast-rng = { path = "../thread-fast-rng" } anyhow = { version = "1.0.69", features = ["backtrace"] } argh = "0.1.10" -axum = { version = "0.6.4", features = ["headers", "ws"] } +axum = { version = "0.6.5", features = ["headers", "ws"] } axum-client-ip = "0.4.0" -axum-macros = "0.3.2" +axum-macros = "0.3.3" chrono = "0.4.23" counter = "0.5.7" derive_more = "0.99.17" @@ -48,7 +48,7 @@ http = "0.2.8" ipnet = "2.7.1" itertools = "0.10.5" log = "0.4.17" -moka = { version = "0.9.7", default-features = false, features = ["future"] } +moka = { version = "0.10.0", default-features = false, features = ["future"] } notify = "5.1.0" num = "0.4.0" num-traits = "0.2.15" diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 086ce81b..087ab95b 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -541,7 +541,7 @@ impl Web3Rpcs { }) .collect(); - debug!("todo: better sort here"); + trace!("todo: better sort here"); let sorted_rpcs = { if usable_rpcs.len() == 1 { From 70105bc7bbfb2799bf6153d86d117cf45f864c9c Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Sun, 12 Feb 2023 10:22:20 -0800 Subject: [PATCH 20/47] variable rename --- TODO.md | 3 +++ web3_proxy/src/rpcs/many.rs | 2 +- web3_proxy/src/rpcs/one.rs | 14 +++++++------- web3_proxy/src/rpcs/request.rs | 32 ++++++++++++++++---------------- 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/TODO.md b/TODO.md index 10974a82..5f3e18f3 100644 --- a/TODO.md +++ b/TODO.md @@ -332,6 +332,9 @@ These are not yet ordered. There might be duplicates. We might not actually need - [x] add archive depth to app config - [x] use from_block and to_block so that eth_getLogs is routed correctly - [x] improve eth_sendRawTransaction server selection +- [x] don't cache methods that are usually very large +- [x] use http provider when available +- [ ] don't use new_head_provider anywhere except new head subscription - [-] proxy mode for benchmarking all backends - [-] proxy mode for sending to multiple backends - [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 087ab95b..83eb3922 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -541,7 +541,7 @@ impl Web3Rpcs { }) .collect(); - trace!("todo: better sort here"); + warn!("todo: better sort here"); let sorted_rpcs = { if usable_rpcs.len() == 1 { diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index c1db0ad5..dfa08a4f 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -64,7 +64,7 @@ pub struct Web3Rpc { /// it is an async lock because we hold it open across awaits /// this provider is only used for new heads subscriptions /// TODO: put the provider inside an arc? - pub(super) new_head_client: AsyncRwLock>>, + pub(super) provider: AsyncRwLock>>, /// keep track of hard limits pub(super) hard_limit_until: Option>, /// rate limits are stored in a central redis so that multiple proxies can share their rate limits @@ -397,7 +397,7 @@ impl Web3Rpc { chain_id: u64, db_conn: Option<&DatabaseConnection>, ) -> anyhow::Result<()> { - if let Ok(mut unlocked_provider) = self.new_head_client.try_write() { + if let Ok(mut unlocked_provider) = self.provider.try_write() { #[cfg(test)] if let Some(Web3Provider::Mock) = unlocked_provider.as_deref() { return Ok(()); @@ -494,7 +494,7 @@ impl Web3Rpc { info!("successfully connected to {}", self); } else { - if self.new_head_client.read().await.is_none() { + if self.provider.read().await.is_none() { return Err(anyhow!("failed waiting for client")); } }; @@ -625,7 +625,7 @@ impl Web3Rpc { loop { // TODO: what if we just happened to have this check line up with another restart? // TODO: think more about this - if let Some(client) = &*conn.new_head_client.read().await { + if let Some(client) = &*conn.provider.read().await { // trace!("health check unlocked with error on {}", conn); // returning error will trigger a reconnect // TODO: do a query of some kind @@ -700,7 +700,7 @@ impl Web3Rpc { ) -> anyhow::Result<()> { trace!("watching new heads on {}", self); - let unlocked_provider = self.new_head_client.read().await; + let unlocked_provider = self.provider.read().await; match unlocked_provider.as_deref() { Some(Web3Provider::Http(_client)) => { @@ -871,7 +871,7 @@ impl Web3Rpc { ) -> anyhow::Result<()> { // TODO: give this a separate client. don't use new_head_client for everything. especially a firehose this big // TODO: timeout - let provider = self.new_head_client.read().await; + let provider = self.provider.read().await; trace!("watching pending transactions on {}", self); // TODO: does this keep the lock open for too long? @@ -983,7 +983,7 @@ impl Web3Rpc { ) -> anyhow::Result { // TODO: think more about this read block // TODO: this should *not* be new_head_client. this should be a separate object - if unlocked_provider.is_some() || self.new_head_client.read().await.is_some() { + if unlocked_provider.is_some() || self.provider.read().await.is_some() { // we already have an unlocked provider. no need to lock } else { return Ok(OpenRequestResult::NotReady(self.backup)); diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index e9d4baf0..2c66307e 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -30,7 +30,7 @@ pub enum OpenRequestResult { #[derive(Debug)] pub struct OpenRequestHandle { authorization: Arc, - conn: Arc, + rpc: Arc, } /// Depending on the context, RPC errors can require different handling. @@ -124,17 +124,17 @@ impl OpenRequestHandle { pub async fn new(authorization: Arc, conn: Arc) -> Self { Self { authorization, - conn, + rpc: conn, } } pub fn connection_name(&self) -> String { - self.conn.name.clone() + self.rpc.name.clone() } #[inline] pub fn clone_connection(&self) -> Arc { - self.conn.clone() + self.rpc.clone() } /// Send a web3 request @@ -154,7 +154,7 @@ impl OpenRequestHandle { // TODO: use tracing spans // TODO: including params in this log is way too verbose // trace!(rpc=%self.conn, %method, "request"); - trace!("requesting from {}", self.conn); + trace!("requesting from {}", self.rpc); let mut provider: Option> = None; let mut logged = false; @@ -167,7 +167,7 @@ impl OpenRequestHandle { break; } - let unlocked_provider = self.conn.new_head_client.read().await; + let unlocked_provider = self.rpc.provider.read().await; if let Some(unlocked_provider) = unlocked_provider.clone() { provider = Some(unlocked_provider); @@ -175,7 +175,7 @@ impl OpenRequestHandle { } if !logged { - debug!("no provider for open handle on {}", self.conn); + debug!("no provider for open handle on {}", self.rpc); logged = true; } @@ -286,10 +286,10 @@ impl OpenRequestHandle { if let Some(msg) = msg { if msg.starts_with("execution reverted") { - trace!("revert from {}", self.conn); + trace!("revert from {}", self.rpc); ResponseTypes::Revert } else if msg.contains("limit") || msg.contains("request") { - trace!("rate limit from {}", self.conn); + trace!("rate limit from {}", self.rpc); ResponseTypes::RateLimit } else { ResponseTypes::Ok @@ -302,10 +302,10 @@ impl OpenRequestHandle { }; if matches!(response_type, ResponseTypes::RateLimit) { - if let Some(hard_limit_until) = self.conn.hard_limit_until.as_ref() { + if let Some(hard_limit_until) = self.rpc.hard_limit_until.as_ref() { let retry_at = Instant::now() + Duration::from_secs(1); - trace!("retry {} at: {:?}", self.conn, retry_at); + trace!("retry {} at: {:?}", self.rpc, retry_at); hard_limit_until.send_replace(retry_at); } @@ -318,14 +318,14 @@ impl OpenRequestHandle { if matches!(response_type, ResponseTypes::Revert) { debug!( "bad response from {}! method={} params={:?} err={:?}", - self.conn, method, params, err + self.rpc, method, params, err ); } } RequestRevertHandler::TraceLevel => { trace!( "bad response from {}! method={} params={:?} err={:?}", - self.conn, + self.rpc, method, params, err @@ -335,20 +335,20 @@ impl OpenRequestHandle { // TODO: include params if not running in release mode error!( "bad response from {}! method={} err={:?}", - self.conn, method, err + self.rpc, method, err ); } RequestRevertHandler::WarnLevel => { // TODO: include params if not running in release mode warn!( "bad response from {}! method={} err={:?}", - self.conn, method, err + self.rpc, method, err ); } RequestRevertHandler::Save => { trace!( "bad response from {}! method={} params={:?} err={:?}", - self.conn, + self.rpc, method, params, err From 0d4acfbaccfa2a3cc3141cdf408a978006041d41 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Tue, 14 Feb 2023 17:15:54 +0100 Subject: [PATCH 21/47] still struggingling with login --- scripts/manual-tests/16-change-user-tier.sh | 31 +++++++++++++++------ web3_proxy/src/user_queries.rs | 1 + 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/scripts/manual-tests/16-change-user-tier.sh b/scripts/manual-tests/16-change-user-tier.sh index 42791bd3..7e865207 100644 --- a/scripts/manual-tests/16-change-user-tier.sh +++ b/scripts/manual-tests/16-change-user-tier.sh @@ -2,24 +2,37 @@ # rm -rf data/ # sea-orm-cli migrate up -# Use CLI to create a user +# Use CLI to create the admin that will call the endpoint RUSTFLAGS="--cfg tokio_unstable" cargo run create_user --address 0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a RUSTFLAGS="--cfg tokio_unstable" cargo run change_admin_status 0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a true +# Use CLI to create the user whose role will be changed via the endpoint +RUSTFLAGS="--cfg tokio_unstable" cargo run create_user --address 0x077e43dcca20da9859daa3fd78b5998b81f794f7 + # Run the proxyd instance -# cargo run --release -- proxyd +RUSTFLAGS="--cfg tokio_unstable" cargo run --release -- proxyd # Check if the instance is running -# curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"web3_clientVersion","id":1}' 127.0.0.1:8544 +curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"web3_clientVersion","id":1}' 127.0.0.1:8544 + +# Login in the user first (add a random bearer token into the database) +# (This segment was not yet tested, but should next time you run the query) +INSERT INTO login (bearer_token, user_id, expires_at, read_only) VALUES ( + "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c", + 1, + "2222-01-01", + FALSE +); -# Login as user first -curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a" #curl -X POST -H "Content-Type: application/json" --data '{}' 127.0.0.1:8544/user/login -curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/" +#curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/" #curl -X GET "127.0.0.1:8544/admin/modify_role?user_address=0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a&user_tier_title=Unlimited" - # Now modify the user role and check this in the database -curl -X GET "127.0.0.1:8544/admin/modify_role?user_address=0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a&user_tier_title=Unlimited" +curl \ +-H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9" \ +-X GET "127.0.0.1:8544/admin/modify_role?user_address=0x077e43dcca20da9859daa3fd78b5998b81f794f7&user_tier_title=1&user_id=1" -# docker-compose down +curl \ +-H "Authorization: Bearer QWxhZGRpbjpvcGVuIHNlc2FtZQ==" \ +-X GET "127.0.0.1:8544/admin/modify_role?user_address=0x077e43dcca20da9859daa3fd78b5998b81f794f7&user_tier_title=Unlimited&user_id=1" diff --git a/web3_proxy/src/user_queries.rs b/web3_proxy/src/user_queries.rs index f777f857..ad448b6c 100644 --- a/web3_proxy/src/user_queries.rs +++ b/web3_proxy/src/user_queries.rs @@ -35,6 +35,7 @@ pub async fn get_user_id_from_params( bearer: Option>>, params: &HashMap, ) -> Result { + debug!("bearer and params are: {:?} {:?}", bearer, params); match (bearer, params.get("user_id")) { (Some(TypedHeader(Authorization(bearer))), Some(user_id)) => { // check for the bearer cache key From 824a6fa1f0ba93ae30a67e0ceb7e3430dd3c9500 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Tue, 14 Feb 2023 12:14:50 -0800 Subject: [PATCH 22/47] well it compiles. doesnt work yet. but it compiles --- Cargo.lock | 25 +- web3_proxy/Cargo.toml | 7 +- web3_proxy/src/app/mod.rs | 35 +- web3_proxy/src/app/ws.rs | 2 +- web3_proxy/src/block_number.rs | 7 +- web3_proxy/src/config.rs | 14 +- web3_proxy/src/rpcs/blockchain.rs | 607 +++++----------------- web3_proxy/src/rpcs/consensus.rs | 527 +++++++++++++++++++ web3_proxy/src/rpcs/many.rs | 100 ++-- web3_proxy/src/rpcs/mod.rs | 2 +- web3_proxy/src/rpcs/one.rs | 19 +- web3_proxy/src/rpcs/synced_connections.rs | 71 --- 12 files changed, 759 insertions(+), 657 deletions(-) create mode 100644 web3_proxy/src/rpcs/consensus.rs delete mode 100644 web3_proxy/src/rpcs/synced_connections.rs diff --git a/Cargo.lock b/Cargo.lock index 89548b54..48d61556 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -282,13 +282,13 @@ checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "axum" -version = "0.6.5" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3114e77b361ec716aa429ae5c04243abe00cf7548e870b9370affcc5c491a7d0" +checksum = "4e246206a63c9830e118d12c894f56a82033da1a2361f5544deeee3df85c99d9" dependencies = [ "async-trait", "axum-core", - "base64 0.20.0", + "base64 0.21.0", "bitflags", "bytes", "futures-util", @@ -347,9 +347,9 @@ dependencies = [ [[package]] name = "axum-macros" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6bee4e05a5e0a5a67515ab24978efa7a80575a7a41a9fae35bb27fed6645d2" +checksum = "5fbf955307ff8addb48d2399393c9e2740dd491537ec562b66ab364fc4a38841" dependencies = [ "heck 0.4.0", "proc-macro2", @@ -419,12 +419,6 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" -[[package]] -name = "base64" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5" - [[package]] name = "base64" version = "0.21.0" @@ -3093,9 +3087,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.0" +version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "opaque-debug" @@ -4520,9 +4514,9 @@ dependencies = [ [[package]] name = "serde_prometheus" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb6048d9e4ebc41f7d1a42c79b04c5b460633be307620a0e34a8f81970ea47" +checksum = "9c1a4ca38f4e746460d1dbd3711b8ca8ae314d1b21247edeff61dd20325b5a6f" dependencies = [ "heapless", "nom", @@ -5793,6 +5787,7 @@ dependencies = [ "notify", "num", "num-traits", + "once_cell", "pagerduty-rs", "parking_lot 0.12.1", "prettytable", diff --git a/web3_proxy/Cargo.toml b/web3_proxy/Cargo.toml index e4a9b93c..7d8ab888 100644 --- a/web3_proxy/Cargo.toml +++ b/web3_proxy/Cargo.toml @@ -27,9 +27,9 @@ thread-fast-rng = { path = "../thread-fast-rng" } anyhow = { version = "1.0.69", features = ["backtrace"] } argh = "0.1.10" -axum = { version = "0.6.5", features = ["headers", "ws"] } +axum = { version = "0.6.6", features = ["headers", "ws"] } axum-client-ip = "0.4.0" -axum-macros = "0.3.3" +axum-macros = "0.3.4" chrono = "0.4.23" counter = "0.5.7" derive_more = "0.99.17" @@ -52,6 +52,7 @@ moka = { version = "0.10.0", default-features = false, features = ["future"] } notify = "5.1.0" num = "0.4.0" num-traits = "0.2.15" +once_cell = { version = "1.17.1" } pagerduty-rs = { version = "0.1.6", default-features = false, features = ["async", "rustls", "sync"] } parking_lot = { version = "0.12.1", features = ["arc_lock"] } prettytable = "*" @@ -62,7 +63,7 @@ rustc-hash = "1.1.0" sentry = { version = "0.29.3", default-features = false, features = ["backtrace", "contexts", "panic", "anyhow", "reqwest", "rustls", "log", "sentry-log"] } serde = { version = "1.0.152", features = [] } serde_json = { version = "1.0.93", default-features = false, features = ["alloc", "raw_value"] } -serde_prometheus = "0.2.0" +serde_prometheus = "0.2.1" siwe = "0.5.0" time = "0.3.17" tokio = { version = "1.25.0", features = ["full"] } diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index 91e9c95d..68ef6bb4 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -10,7 +10,7 @@ use crate::frontend::rpc_proxy_ws::ProxyMode; use crate::jsonrpc::{ JsonRpcForwardedResponse, JsonRpcForwardedResponseEnum, JsonRpcRequest, JsonRpcRequestEnum, }; -use crate::rpcs::blockchain::{ArcBlock, SavedBlock}; +use crate::rpcs::blockchain::{BlockHashesCache, Web3ProxyBlock}; use crate::rpcs::many::Web3Rpcs; use crate::rpcs::one::Web3Rpc; use crate::rpcs::transactions::TxStatus; @@ -23,7 +23,7 @@ use derive_more::From; use entities::sea_orm_active_enums::LogLevel; use entities::user; use ethers::core::utils::keccak256; -use ethers::prelude::{Address, Block, Bytes, Transaction, TxHash, H256, U64}; +use ethers::prelude::{Address, Bytes, Transaction, TxHash, H256, U64}; use ethers::types::U256; use ethers::utils::rlp::{Decodable, Rlp}; use futures::future::join_all; @@ -69,9 +69,9 @@ pub static REQUEST_PERIOD: u64 = 60; #[derive(From)] struct ResponseCacheKey { // if none, this is cached until evicted - from_block: Option, + from_block: Option, // to_block is only set when ranges of blocks are requested (like with eth_getLogs) - to_block: Option, + to_block: Option, method: String, // TODO: better type for this params: Option, @@ -204,7 +204,7 @@ pub struct Web3ProxyApp { response_cache: ResponseCache, // don't drop this or the sender will stop working // TODO: broadcast channel instead? - watch_consensus_head_receiver: watch::Receiver, + watch_consensus_head_receiver: watch::Receiver, pending_tx_sender: broadcast::Sender, pub config: AppConfig, pub db_conn: Option, @@ -542,7 +542,7 @@ impl Web3ProxyApp { // TODO: i don't like doing Block::default here! Change this to "None"? let (watch_consensus_head_sender, watch_consensus_head_receiver) = - watch::channel(Arc::new(Block::default())); + watch::channel(Web3ProxyBlock::default()); // TODO: will one receiver lagging be okay? how big should this be? let (pending_tx_sender, pending_tx_receiver) = broadcast::channel(256); @@ -563,11 +563,11 @@ impl Web3ProxyApp { // TODO: limits from config // these blocks don't have full transactions, but they do have rather variable amounts of transaction hashes // TODO: how can we do the weigher better? - let block_map = Cache::builder() + let block_map: BlockHashesCache = Cache::builder() .max_capacity(1024 * 1024 * 1024) - .weigher(|_k, v: &ArcBlock| { + .weigher(|_k, v: &Web3ProxyBlock| { // TODO: is this good enough? - 1 + v.transactions.len().try_into().unwrap_or(u32::MAX) + 1 + v.block.transactions.len().try_into().unwrap_or(u32::MAX) }) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); @@ -577,6 +577,8 @@ impl Web3ProxyApp { top_config.app.chain_id, db_conn.clone(), http_client.clone(), + top_config.app.max_block_age, + top_config.app.max_block_lag, top_config.app.min_synced_rpcs, top_config.app.min_sum_soft_limit, pending_transactions.clone(), @@ -603,6 +605,9 @@ impl Web3ProxyApp { top_config.app.chain_id, db_conn.clone(), http_client.clone(), + // private rpcs don't get subscriptions, so no need for max_block_age or max_block_lag + None, + None, 0, 0, pending_transactions.clone(), @@ -735,7 +740,7 @@ impl Web3ProxyApp { Ok((app, cancellable_handles, important_background_handles).into()) } - pub fn head_block_receiver(&self) -> watch::Receiver { + pub fn head_block_receiver(&self) -> watch::Receiver { self.watch_consensus_head_receiver.clone() } @@ -1481,7 +1486,7 @@ impl Web3ProxyApp { .await?; Some(ResponseCacheKey { - from_block: Some(SavedBlock::new(request_block)), + from_block: Some(request_block), to_block: None, method: method.to_string(), // TODO: hash here? @@ -1521,8 +1526,8 @@ impl Web3ProxyApp { .await?; Some(ResponseCacheKey { - from_block: Some(SavedBlock::new(from_block)), - to_block: Some(SavedBlock::new(to_block)), + from_block: Some(from_block), + to_block: Some(to_block), method: method.to_string(), // TODO: hash here? params: request.params.clone(), @@ -1537,8 +1542,8 @@ impl Web3ProxyApp { let authorization = authorization.clone(); if let Some(cache_key) = cache_key { - let from_block_num = cache_key.from_block.as_ref().map(|x| x.number()); - let to_block_num = cache_key.to_block.as_ref().map(|x| x.number()); + let from_block_num = cache_key.from_block.as_ref().map(|x| *x.number()); + let to_block_num = cache_key.to_block.as_ref().map(|x| *x.number()); self.response_cache .try_get_with(cache_key, async move { diff --git a/web3_proxy/src/app/ws.rs b/web3_proxy/src/app/ws.rs index 582ea814..e61db2c2 100644 --- a/web3_proxy/src/app/ws.rs +++ b/web3_proxy/src/app/ws.rs @@ -72,7 +72,7 @@ impl Web3ProxyApp { "params": { "subscription": subscription_id, // TODO: option to include full transaction objects instead of just the hashes? - "result": new_head.as_ref(), + "result": new_head.block, }, }); diff --git a/web3_proxy/src/block_number.rs b/web3_proxy/src/block_number.rs index da708286..4b92d1e7 100644 --- a/web3_proxy/src/block_number.rs +++ b/web3_proxy/src/block_number.rs @@ -80,12 +80,7 @@ pub async fn clean_block_number( .context("fetching block number from hash")?; // TODO: set change to true? i think not we should probably use hashes for everything. - ( - block - .number - .expect("blocks here should always have numbers"), - false, - ) + (*block.number(), false) } else { return Err(anyhow::anyhow!("blockHash missing")); } diff --git a/web3_proxy/src/config.rs b/web3_proxy/src/config.rs index 2bec1bd0..54456bb4 100644 --- a/web3_proxy/src/config.rs +++ b/web3_proxy/src/config.rs @@ -1,9 +1,9 @@ -use crate::rpcs::blockchain::BlockHashesCache; +use crate::app::AnyhowJoinHandle; +use crate::rpcs::blockchain::{BlockHashesCache, Web3ProxyBlock}; use crate::rpcs::one::Web3Rpc; -use crate::{app::AnyhowJoinHandle, rpcs::blockchain::ArcBlock}; use argh::FromArgs; use ethers::prelude::TxHash; -use ethers::types::U256; +use ethers::types::{U256, U64}; use hashbrown::HashMap; use log::warn; use migration::sea_orm::DatabaseConnection; @@ -11,7 +11,7 @@ use serde::Deserialize; use std::sync::Arc; use tokio::sync::broadcast; -pub type BlockAndRpc = (Option, Arc); +pub type BlockAndRpc = (Option, Arc); pub type TxHashAndRpc = (TxHash, Arc); #[derive(Debug, FromArgs)] @@ -105,6 +105,12 @@ pub struct AppConfig { pub invite_code: Option, pub login_domain: Option, + /// do not serve any requests if the best known block is older than this many seconds. + pub max_block_age: Option, + + /// do not serve any requests if the best known block is behind the best known block by more than this many blocks. + pub max_block_lag: Option, + /// Rate limit for bearer token authenticated entrypoints. /// This is separate from the rpc limits. #[serde(default = "default_bearer_token_max_concurrent_requests")] diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs index b70663f1..b6bfd01e 100644 --- a/web3_proxy/src/rpcs/blockchain.rs +++ b/web3_proxy/src/rpcs/blockchain.rs @@ -1,16 +1,14 @@ +use super::consensus::ConsensusFinder; use super::many::Web3Rpcs; ///! Keep track of the blockchain as seen by a Web3Rpcs. use super::one::Web3Rpc; use super::transactions::TxStatus; use crate::frontend::authorization::Authorization; -use crate::{ - config::BlockAndRpc, jsonrpc::JsonRpcRequest, rpcs::synced_connections::ConsensusWeb3Rpcs, -}; +use crate::{config::BlockAndRpc, jsonrpc::JsonRpcRequest}; use anyhow::Context; use derive_more::From; use ethers::prelude::{Block, TxHash, H256, U64}; -use hashbrown::{HashMap, HashSet}; -use log::{debug, error, warn, Level}; +use log::{debug, error, trace, warn, Level}; use moka::future::Cache; use serde::Serialize; use serde_json::json; @@ -22,17 +20,18 @@ use tokio::time::Duration; // TODO: type for Hydrated Blocks with their full transactions? pub type ArcBlock = Arc>; -pub type BlockHashesCache = Cache; +pub type BlockHashesCache = Cache; /// A block and its age. #[derive(Clone, Debug, Default, From, Serialize)] -pub struct SavedBlock { +pub struct Web3ProxyBlock { pub block: ArcBlock, /// number of seconds this block was behind the current time when received - pub age: u64, + /// this is only set if the block is from a subscription + pub received_age: Option, } -impl PartialEq for SavedBlock { +impl PartialEq for Web3ProxyBlock { fn eq(&self, other: &Self) -> bool { match (self.block.hash, other.block.hash) { (None, None) => true, @@ -43,18 +42,23 @@ impl PartialEq for SavedBlock { } } -impl SavedBlock { +impl Web3ProxyBlock { + /// A new block has arrived over a subscription pub fn new(block: ArcBlock) -> Self { - let mut x = Self { block, age: 0 }; + let mut x = Self { + block, + received_age: None, + }; // no need to recalulate lag every time // if the head block gets too old, a health check restarts this connection - x.age = x.lag(); + // TODO: emit a stat for received_age + x.received_age = Some(x.age()); x } - pub fn lag(&self) -> u64 { + pub fn age(&self) -> u64 { let now = SystemTime::now() .duration_since(UNIX_EPOCH) .expect("there should always be time"); @@ -70,37 +74,58 @@ impl SavedBlock { } } - pub fn hash(&self) -> H256 { - self.block.hash.expect("saved blocks must have a hash") + #[inline(always)] + pub fn parent_hash(&self) -> &H256 { + &self.block.parent_hash } - // TODO: return as U64 or u64? - pub fn number(&self) -> U64 { - self.block.number.expect("saved blocks must have a number") + #[inline(always)] + pub fn hash(&self) -> &H256 { + self.block + .hash + .as_ref() + .expect("saved blocks must have a hash") + } + + #[inline(always)] + pub fn number(&self) -> &U64 { + self.block + .number + .as_ref() + .expect("saved blocks must have a number") } } -impl From for SavedBlock { +impl From for Web3ProxyBlock { fn from(x: ArcBlock) -> Self { - SavedBlock::new(x) + Web3ProxyBlock { + block: x, + received_age: None, + } } } -impl Display for SavedBlock { +impl Display for Web3ProxyBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{} ({}, {}s old)", self.number(), self.hash(), self.age) + write!( + f, + "{} ({}, {}s old)", + self.number(), + self.hash(), + self.age() + ) } } impl Web3Rpcs { /// add a block to our mappings and track the heaviest chain - pub async fn save_block( + pub async fn try_cache_block( &self, - block: ArcBlock, + block: Web3ProxyBlock, heaviest_chain: bool, - ) -> anyhow::Result { + ) -> anyhow::Result { // TODO: i think we can rearrange this function to make it faster on the hot path - let block_hash = block.hash.as_ref().context("no block hash")?; + let block_hash = block.hash(); // skip Block::default() if block_hash.is_zero() { @@ -108,7 +133,7 @@ impl Web3Rpcs { return Ok(block); } - let block_num = block.number.as_ref().context("no block num")?; + let block_num = block.number(); // TODO: think more about heaviest_chain. would be better to do the check inside this function if heaviest_chain { @@ -136,7 +161,7 @@ impl Web3Rpcs { authorization: &Arc, hash: &H256, rpc: Option<&Arc>, - ) -> anyhow::Result { + ) -> anyhow::Result { // first, try to get the hash from our cache // the cache is set last, so if its here, its everywhere // TODO: use try_get_with @@ -147,17 +172,18 @@ impl Web3Rpcs { // block not in cache. we need to ask an rpc for it let get_block_params = (*hash, false); // TODO: if error, retry? - let block: ArcBlock = match rpc { + let block: Web3ProxyBlock = match rpc { Some(rpc) => rpc .wait_for_request_handle(authorization, Some(Duration::from_secs(30)), None) .await? - .request::<_, Option<_>>( + .request::<_, Option>( "eth_getBlockByHash", &json!(get_block_params), Level::Error.into(), None, ) .await? + .map(Into::into) .context("no block!")?, None => { // TODO: helper for method+params => JsonRpcRequest @@ -181,13 +207,14 @@ impl Web3Rpcs { let block: Option = serde_json::from_str(block.get())?; - block.context("no block!")? + // TODO: from isn't great here. received time is going to be weird + block.map(Into::into).context("no block!")? } }; // the block was fetched using eth_getBlockByHash, so it should have all fields // TODO: fill in heaviest_chain! if the block is old enough, is this definitely true? - let block = self.save_block(block, false).await?; + let block = self.try_cache_block(block, false).await?; Ok(block) } @@ -200,7 +227,7 @@ impl Web3Rpcs { ) -> anyhow::Result<(H256, u64)> { let (block, block_depth) = self.cannonical_block(authorization, num).await?; - let hash = block.hash.expect("Saved blocks should always have hashes"); + let hash = *block.hash(); Ok((hash, block_depth)) } @@ -211,7 +238,7 @@ impl Web3Rpcs { &self, authorization: &Arc, num: &U64, - ) -> anyhow::Result<(ArcBlock, u64)> { + ) -> anyhow::Result<(Web3ProxyBlock, u64)> { // we only have blocks by hash now // maybe save them during save_block in a blocks_by_number Cache> // if theres multiple, use petgraph to find the one on the main chain (and remove the others if they have enough confirmations) @@ -223,28 +250,21 @@ impl Web3Rpcs { .clone(); // be sure the requested block num exists - let mut head_block_num = consensus_head_receiver.borrow_and_update().number; + // TODO: is this okay? what if we aren't synced?! + let mut head_block_num = *consensus_head_receiver.borrow_and_update().number(); loop { - if let Some(head_block_num) = head_block_num { - if num <= &head_block_num { - break; - } + if num <= &head_block_num { + break; } + trace!("waiting for future block {} > {}", num, head_block_num); consensus_head_receiver.changed().await?; - head_block_num = consensus_head_receiver.borrow_and_update().number; + head_block_num = *consensus_head_receiver.borrow_and_update().number(); } - let head_block_num = - head_block_num.expect("we should only get here if we have a head block"); - - let block_depth = if num >= &head_block_num { - 0 - } else { - (head_block_num - num).as_u64() - }; + let block_depth = (head_block_num - num).as_u64(); // try to get the hash from our cache // deref to not keep the lock open @@ -276,8 +296,10 @@ impl Web3Rpcs { let block: ArcBlock = serde_json::from_str(raw_block.get())?; + let block = Web3ProxyBlock::from(block); + // the block was fetched using eth_getBlockByNumber, so it should have all fields and be on the heaviest chain - let block = self.save_block(block, true).await?; + let block = self.try_cache_block(block, true).await?; Ok((block, block_depth)) } @@ -288,18 +310,16 @@ impl Web3Rpcs { block_receiver: flume::Receiver, // TODO: document that this is a watch sender and not a broadcast! if things get busy, blocks might get missed // Geth's subscriptions have the same potential for skipping blocks. - head_block_sender: watch::Sender, + head_block_sender: watch::Sender, pending_tx_sender: Option>, ) -> anyhow::Result<()> { // TODO: indexmap or hashmap? what hasher? with_capacity? // TODO: this will grow unbounded. prune old heads on this at the same time we prune the graph? - let mut connection_heads = ConsensusFinder::default(); + let mut connection_heads = ConsensusFinder::new(self.max_block_age, self.max_block_lag); loop { match block_receiver.recv_async().await { Ok((new_block, rpc)) => { - let new_block = new_block.map(Into::into); - let rpc_name = rpc.name.clone(); if let Err(err) = self @@ -313,7 +333,7 @@ impl Web3Rpcs { ) .await { - warn!("unable to process block from rpc {}: {:?}", rpc_name, err); + warn!("unable to process block from rpc {}: {:#?}", rpc_name, err); } } Err(err) => { @@ -331,60 +351,72 @@ impl Web3Rpcs { &self, authorization: &Arc, consensus_finder: &mut ConsensusFinder, - rpc_head_block: Option, + rpc_head_block: Option, rpc: Arc, - head_block_sender: &watch::Sender, + head_block_sender: &watch::Sender, pending_tx_sender: &Option>, ) -> anyhow::Result<()> { // TODO: how should we handle an error here? if !consensus_finder .update_rpc(rpc_head_block.clone(), rpc.clone(), self) - .await? + .await + .context("failed to update rpc")? { - // nothing changed. no need + // nothing changed. no need to scan for a new consensus head return Ok(()); } let new_synced_connections = consensus_finder .best_consensus_connections(authorization, self) - .await; + .await + .context("no consensus head block!") + .map_err(|err| { + self.watch_consensus_rpcs_sender + .send_replace(Arc::new(Default::default())); + + err + })?; // TODO: what should we do if the block number of new_synced_connections is < old_synced_connections? wait? - let includes_backups = new_synced_connections.includes_backups; + let backups_needed = new_synced_connections.backups_needed; let consensus_head_block = new_synced_connections.head_block.clone(); let num_consensus_rpcs = new_synced_connections.num_conns(); - let num_checked_rpcs = new_synced_connections.num_checked_conns; - let num_active_rpcs = consensus_finder.all.rpc_name_to_hash.len(); + let num_checked_rpcs = 0; // TODO: figure this out + let num_active_rpcs = consensus_finder + .all_rpcs_group() + .map(|x| x.len()) + .unwrap_or_default(); let total_rpcs = self.conns.len(); let old_consensus_head_connections = self - .watch_consensus_connections_sender + .watch_consensus_rpcs_sender .send_replace(Arc::new(new_synced_connections)); - let includes_backups_str = if includes_backups { "B " } else { "" }; + let backups_voted_str = if backups_needed { "B " } else { "" }; - if let Some(consensus_saved_block) = consensus_head_block { + if let Some(consensus_head_block) = consensus_head_block { match &old_consensus_head_connections.head_block { None => { debug!( "first {}{}/{}/{}/{} block={}, rpc={}", - includes_backups_str, + backups_voted_str, num_consensus_rpcs, num_checked_rpcs, num_active_rpcs, total_rpcs, - consensus_saved_block, + consensus_head_block, rpc, ); - if includes_backups { + if backups_needed { // TODO: what else should be in this error? warn!("Backup RPCs are in use!"); } + // this should already be cached let consensus_head_block = - self.save_block(consensus_saved_block.block, true).await?; + self.try_cache_block(consensus_head_block, true).await?; head_block_sender .send(consensus_head_block) @@ -396,46 +428,45 @@ impl Web3Rpcs { .map(|x| x.to_string()) .unwrap_or_else(|| "None".to_string()); - match consensus_saved_block.number().cmp(&old_head_block.number()) { + match consensus_head_block.number().cmp(&old_head_block.number()) { Ordering::Equal => { // multiple blocks with the same fork! - if consensus_saved_block.hash() == old_head_block.hash() { + if consensus_head_block.hash() == old_head_block.hash() { // no change in hash. no need to use head_block_sender // TODO: trace level if rpc is backup debug!( "con {}{}/{}/{}/{} con={} rpc={}@{}", - includes_backups_str, + backups_voted_str, num_consensus_rpcs, num_checked_rpcs, num_active_rpcs, total_rpcs, - consensus_saved_block, + consensus_head_block, rpc, rpc_head_str, ) } else { // hash changed - - if includes_backups { + if backups_needed { // TODO: what else should be in this error? warn!("Backup RPCs are in use!"); } debug!( "unc {}{}/{}/{}/{} con_head={} old={} rpc={}@{}", - includes_backups_str, + backups_voted_str, num_consensus_rpcs, num_checked_rpcs, num_active_rpcs, total_rpcs, - consensus_saved_block, + consensus_head_block, old_head_block, rpc, rpc_head_str, ); let consensus_head_block = self - .save_block(consensus_saved_block.block, true) + .try_cache_block(consensus_head_block, true) .await .context("save consensus_head_block as heaviest chain")?; @@ -449,25 +480,25 @@ impl Web3Rpcs { // TODO: better log warn!( "chain rolled back {}{}/{}/{}/{} con={} old={} rpc={}@{}", - includes_backups_str, + backups_voted_str, num_consensus_rpcs, num_checked_rpcs, num_active_rpcs, total_rpcs, - consensus_saved_block, + consensus_head_block, old_head_block, rpc, rpc_head_str, ); - if includes_backups { + if backups_needed { // TODO: what else should be in this error? warn!("Backup RPCs are in use!"); } // TODO: tell save_block to remove any higher block numbers from the cache. not needed because we have other checks on requested blocks being > head, but still seems like a good idea let consensus_head_block = self - .save_block(consensus_saved_block.block, true) + .try_cache_block(consensus_head_block, true) .await .context( "save_block sending consensus_head_block as heaviest chain", @@ -480,23 +511,23 @@ impl Web3Rpcs { Ordering::Greater => { debug!( "new {}{}/{}/{}/{} con={} rpc={}@{}", - includes_backups_str, + backups_voted_str, num_consensus_rpcs, num_checked_rpcs, num_active_rpcs, total_rpcs, - consensus_saved_block, + consensus_head_block, rpc, rpc_head_str, ); - if includes_backups { + if backups_needed { // TODO: what else should be in this error? warn!("Backup RPCs are in use!"); } let consensus_head_block = - self.save_block(consensus_saved_block.block, true).await?; + self.try_cache_block(consensus_head_block, true).await?; head_block_sender.send(consensus_head_block)?; } @@ -512,7 +543,7 @@ impl Web3Rpcs { if num_checked_rpcs >= self.min_head_rpcs { error!( "non {}{}/{}/{}/{} rpc={}@{}", - includes_backups_str, + backups_voted_str, num_consensus_rpcs, num_checked_rpcs, num_active_rpcs, @@ -523,7 +554,7 @@ impl Web3Rpcs { } else { debug!( "non {}{}/{}/{}/{} rpc={}@{}", - includes_backups_str, + backups_voted_str, num_consensus_rpcs, num_checked_rpcs, num_active_rpcs, @@ -537,403 +568,3 @@ impl Web3Rpcs { Ok(()) } } - -struct ConnectionsGroup { - /// TODO: this group might not actually include backups, but they were at leastchecked - includes_backups: bool, - rpc_name_to_hash: HashMap, -} - -impl ConnectionsGroup { - fn new(with_backups: bool) -> Self { - Self { - includes_backups: with_backups, - rpc_name_to_hash: Default::default(), - } - } - - fn without_backups() -> Self { - Self::new(false) - } - - fn with_backups() -> Self { - Self::new(true) - } - - fn remove(&mut self, rpc: &Web3Rpc) -> Option { - self.rpc_name_to_hash.remove(rpc.name.as_str()) - } - - fn insert(&mut self, rpc: &Web3Rpc, block_hash: H256) -> Option { - self.rpc_name_to_hash.insert(rpc.name.clone(), block_hash) - } - - // TODO: i don't love having this here. move to web3_connections? - async fn get_block_from_rpc( - &self, - rpc_name: &str, - hash: &H256, - authorization: &Arc, - web3_rpcs: &Web3Rpcs, - ) -> anyhow::Result { - // // TODO: why does this happen?!?! seems to only happen with uncled blocks - // // TODO: maybe we should do try_get_with? - // // TODO: maybe we should just continue. this only seems to happen when an older block is received - // warn!( - // "Missing connection_head_block in block_hashes. Fetching now. hash={}. other={}", - // connection_head_hash, conn_name - // ); - - // this option should almost always be populated. if the connection reconnects at a bad time it might not be available though - // TODO: if this is None, I think we should error. - let rpc = web3_rpcs.conns.get(rpc_name); - - web3_rpcs.block(authorization, hash, rpc).await - } - - // TODO: do this during insert/remove? - pub(self) async fn highest_block( - &self, - authorization: &Arc, - web3_rpcs: &Web3Rpcs, - ) -> Option { - let mut checked_heads = HashSet::with_capacity(self.rpc_name_to_hash.len()); - let mut highest_block = None::; - - for (rpc_name, rpc_head_hash) in self.rpc_name_to_hash.iter() { - // don't waste time checking the same hash multiple times - if checked_heads.contains(rpc_head_hash) { - continue; - } - - let rpc_block = match self - .get_block_from_rpc(rpc_name, rpc_head_hash, authorization, web3_rpcs) - .await - { - Ok(x) => x, - Err(err) => { - warn!( - "failed getting block {} from {} while finding highest block number: {:?}", - rpc_head_hash, rpc_name, err, - ); - continue; - } - }; - - checked_heads.insert(rpc_head_hash); - - // if this is the first block we've tried - // or if this rpc's newest block has a higher number - // we used to check total difficulty, but that isn't a thing anymore on ETH - // TODO: we still need total difficulty on some other PoW chains. whats annoying is it isn't considered part of the "block header" just the block. so websockets don't return it - let highest_num = highest_block - .as_ref() - .map(|x| x.number.expect("blocks here should always have a number")); - let rpc_num = rpc_block.as_ref().number; - - if rpc_num > highest_num { - highest_block = Some(rpc_block); - } - } - - highest_block - } - - pub(self) async fn consensus_head_connections( - &self, - authorization: &Arc, - web3_rpcs: &Web3Rpcs, - ) -> anyhow::Result { - let mut maybe_head_block = match self.highest_block(authorization, web3_rpcs).await { - None => return Err(anyhow::anyhow!("No blocks known")), - Some(x) => x, - }; - - let num_known = self.rpc_name_to_hash.len(); - - // track rpcs on this heaviest chain so we can build a new ConsensusConnections - let mut highest_rpcs = HashSet::<&str>::new(); - // a running total of the soft limits covered by the rpcs that agree on the head block - let mut highest_rpcs_sum_soft_limit: u32 = 0; - // TODO: also track highest_rpcs_sum_hard_limit? llama doesn't need this, so it can wait - - // check the highest work block for a set of rpcs that can serve our request load - // if it doesn't have enough rpcs for our request load, check the parent block - // TODO: loop for how many parent blocks? we don't want to serve blocks that are too far behind. probably different per chain - // TODO: this loop is pretty long. any way to clean up this code? - for _ in 0..6 { - let maybe_head_hash = maybe_head_block - .hash - .as_ref() - .expect("blocks here always need hashes"); - - // find all rpcs with maybe_head_block as their current head - for (rpc_name, rpc_head_hash) in self.rpc_name_to_hash.iter() { - if rpc_head_hash != maybe_head_hash { - // connection is not on the desired block - continue; - } - if highest_rpcs.contains(rpc_name.as_str()) { - // connection is on a child block - continue; - } - - if let Some(rpc) = web3_rpcs.conns.get(rpc_name.as_str()) { - highest_rpcs.insert(rpc_name); - highest_rpcs_sum_soft_limit += rpc.soft_limit; - } else { - // i don't think this is an error. i think its just if a reconnect is currently happening - warn!("connection missing: {}", rpc_name); - debug!("web3_rpcs.conns: {:#?}", web3_rpcs.conns); - } - } - - if highest_rpcs_sum_soft_limit >= web3_rpcs.min_sum_soft_limit - && highest_rpcs.len() >= web3_rpcs.min_head_rpcs - { - // we have enough servers with enough requests - break; - } - - // not enough rpcs yet. check the parent block - if let Some(parent_block) = web3_rpcs.block_hashes.get(&maybe_head_block.parent_hash) { - // trace!( - // child=%maybe_head_hash, parent=%parent_block.hash.unwrap(), "avoiding thundering herd", - // ); - - maybe_head_block = parent_block; - continue; - } else { - if num_known < web3_rpcs.min_head_rpcs { - return Err(anyhow::anyhow!( - "not enough rpcs connected: {}/{}/{}", - highest_rpcs.len(), - num_known, - web3_rpcs.min_head_rpcs, - )); - } else { - let soft_limit_percent = (highest_rpcs_sum_soft_limit as f32 - / web3_rpcs.min_sum_soft_limit as f32) - * 100.0; - - return Err(anyhow::anyhow!( - "ran out of parents to check. rpcs {}/{}/{}. soft limit: {:.2}% ({}/{})", - highest_rpcs.len(), - num_known, - web3_rpcs.min_head_rpcs, - highest_rpcs_sum_soft_limit, - web3_rpcs.min_sum_soft_limit, - soft_limit_percent, - )); - } - } - } - - // TODO: if consensus_head_rpcs.is_empty, try another method of finding the head block. will need to change the return Err above into breaks. - - // we've done all the searching for the heaviest block that we can - if highest_rpcs.len() < web3_rpcs.min_head_rpcs - || highest_rpcs_sum_soft_limit < web3_rpcs.min_sum_soft_limit - { - // if we get here, not enough servers are synced. return an error - let soft_limit_percent = - (highest_rpcs_sum_soft_limit as f32 / web3_rpcs.min_sum_soft_limit as f32) * 100.0; - - return Err(anyhow::anyhow!( - "Not enough resources. rpcs {}/{}/{}. soft limit: {:.2}% ({}/{})", - highest_rpcs.len(), - num_known, - web3_rpcs.min_head_rpcs, - highest_rpcs_sum_soft_limit, - web3_rpcs.min_sum_soft_limit, - soft_limit_percent, - )); - } - - // success! this block has enough soft limit and nodes on it (or on later blocks) - let conns: Vec> = highest_rpcs - .into_iter() - .filter_map(|conn_name| web3_rpcs.conns.get(conn_name).cloned()) - .collect(); - - // TODO: DEBUG only check - let _ = maybe_head_block - .hash - .expect("head blocks always have hashes"); - let _ = maybe_head_block - .number - .expect("head blocks always have numbers"); - - let consensus_head_block: SavedBlock = maybe_head_block.into(); - - Ok(ConsensusWeb3Rpcs { - head_block: Some(consensus_head_block), - conns, - num_checked_conns: self.rpc_name_to_hash.len(), - includes_backups: self.includes_backups, - }) - } -} - -/// A ConsensusConnections builder that tracks all connection heads across multiple groups of servers -pub struct ConsensusFinder { - /// only main servers - main: ConnectionsGroup, - /// main and backup servers - all: ConnectionsGroup, -} - -impl Default for ConsensusFinder { - fn default() -> Self { - Self { - main: ConnectionsGroup::without_backups(), - all: ConnectionsGroup::with_backups(), - } - } -} - -impl ConsensusFinder { - fn remove(&mut self, rpc: &Web3Rpc) -> Option { - // TODO: should we have multiple backup tiers? (remote datacenters vs third party) - if !rpc.backup { - self.main.remove(rpc); - } - self.all.remove(rpc) - } - - fn insert(&mut self, rpc: &Web3Rpc, new_hash: H256) -> Option { - // TODO: should we have multiple backup tiers? (remote datacenters vs third party) - if !rpc.backup { - self.main.insert(rpc, new_hash); - } - self.all.insert(rpc, new_hash) - } - - /// Update our tracking of the rpc and return true if something changed - async fn update_rpc( - &mut self, - rpc_head_block: Option, - rpc: Arc, - // we need this so we can save the block to caches. i don't like it though. maybe we should use a lazy_static Cache wrapper that has a "save_block" method?. i generally dislike globals but i also dislike all the types having to pass eachother around - web3_connections: &Web3Rpcs, - ) -> anyhow::Result { - // add the rpc's block to connection_heads, or remove the rpc from connection_heads - let changed = match rpc_head_block { - Some(mut rpc_head_block) => { - // we don't know if its on the heaviest chain yet - rpc_head_block.block = web3_connections - .save_block(rpc_head_block.block, false) - .await?; - - // we used to remove here if the block was too far behind. but it just made things more complicated - - let rpc_head_hash = rpc_head_block.hash(); - - if let Some(prev_hash) = self.insert(&rpc, rpc_head_hash) { - if prev_hash == rpc_head_hash { - // this block was already sent by this rpc. return early - false - } else { - // new block for this rpc - true - } - } else { - // first block for this rpc - true - } - } - None => { - if self.remove(&rpc).is_none() { - // this rpc was already removed - false - } else { - // rpc head changed from being synced to not - true - } - } - }; - - Ok(changed) - } - - // TODO: this could definitely be cleaner. i don't like the error handling/unwrapping - async fn best_consensus_connections( - &mut self, - authorization: &Arc, - web3_connections: &Web3Rpcs, - ) -> ConsensusWeb3Rpcs { - let highest_block_num = match self - .all - .highest_block(authorization, web3_connections) - .await - { - None => { - return ConsensusWeb3Rpcs::default(); - } - Some(x) => x.number.expect("blocks here should always have a number"), - }; - - // TODO: also needs to be not less than our current head - let mut min_block_num = highest_block_num.saturating_sub(U64::from(5)); - - // we also want to be sure we don't ever go backwards! - if let Some(current_consensus_head_num) = web3_connections.head_block_num() { - min_block_num = min_block_num.max(current_consensus_head_num); - } - - // TODO: pass `min_block_num` to consensus_head_connections? - let consensus_head_for_main = self - .main - .consensus_head_connections(authorization, web3_connections) - .await - .map_err(|err| err.context("cannot use main group")); - - let consensus_num_for_main = consensus_head_for_main - .as_ref() - .ok() - .map(|x| x.head_block.as_ref().unwrap().number()); - - if let Some(consensus_num_for_main) = consensus_num_for_main { - if consensus_num_for_main >= min_block_num { - return consensus_head_for_main.unwrap(); - } - } - - // TODO: pass `min_block_num` to consensus_head_connections? - let consensus_connections_for_all = match self - .all - .consensus_head_connections(authorization, web3_connections) - .await - { - Err(err) => { - if self.all.rpc_name_to_hash.len() < web3_connections.min_head_rpcs { - debug!("No consensus head yet: {}", err); - } - return ConsensusWeb3Rpcs::default(); - } - Ok(x) => x, - }; - - let consensus_num_for_all = consensus_connections_for_all - .head_block - .as_ref() - .map(|x| x.number()); - - if consensus_num_for_all > consensus_num_for_main { - if consensus_num_for_all < Some(min_block_num) { - // TODO: this should have an alarm in sentry - error!("CONSENSUS HEAD w/ BACKUP NODES IS VERY OLD!"); - } - consensus_connections_for_all - } else { - if let Ok(x) = consensus_head_for_main { - error!("CONSENSUS HEAD IS VERY OLD! Backup RPCs did not improve this situation"); - x - } else { - // TODO: i don't think we need this error. and i doublt we'll ever even get here - error!("NO CONSENSUS HEAD!"); - ConsensusWeb3Rpcs::default() - } - } - } -} diff --git a/web3_proxy/src/rpcs/consensus.rs b/web3_proxy/src/rpcs/consensus.rs new file mode 100644 index 00000000..289be536 --- /dev/null +++ b/web3_proxy/src/rpcs/consensus.rs @@ -0,0 +1,527 @@ +use crate::frontend::authorization::Authorization; + +use super::blockchain::Web3ProxyBlock; +use super::many::Web3Rpcs; +use super::one::Web3Rpc; +use ethers::prelude::{H256, U64}; +use hashbrown::{HashMap, HashSet}; +use log::{debug, trace, warn}; +use serde::Serialize; +use std::collections::BTreeMap; +use std::fmt; +use std::sync::Arc; + +/// A collection of Web3Rpcs that are on the same block. +/// Serialize is so we can print it on our debug endpoint +#[derive(Clone, Default, Serialize)] +pub struct ConsensusWeb3Rpcs { + pub(super) head_block: Option, + // TODO: this should be able to serialize, but it isn't + #[serde(skip_serializing)] + pub(super) conns: Vec>, + pub(super) backups_voted: Option, + pub(super) backups_needed: bool, +} + +impl ConsensusWeb3Rpcs { + pub fn num_conns(&self) -> usize { + self.conns.len() + } + + pub fn sum_soft_limit(&self) -> u32 { + self.conns.iter().fold(0, |sum, rpc| sum + rpc.soft_limit) + } + + // TODO: sum_hard_limit? +} + +impl fmt::Debug for ConsensusWeb3Rpcs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // TODO: the default formatter takes forever to write. this is too quiet though + // TODO: print the actual conns? + f.debug_struct("ConsensusConnections") + .field("head_block", &self.head_block) + .field("num_conns", &self.conns.len()) + .finish_non_exhaustive() + } +} + +impl Web3Rpcs { + // TODO: return a ref? + pub fn head_block(&self) -> Option { + self.watch_consensus_head_receiver + .as_ref() + .map(|x| x.borrow().clone()) + } + + // TODO: return a ref? + pub fn head_block_hash(&self) -> Option { + self.head_block().map(|x| *x.hash()) + } + + // TODO: return a ref? + pub fn head_block_num(&self) -> Option { + self.head_block().map(|x| *x.number()) + } + + pub fn synced(&self) -> bool { + !self.watch_consensus_rpcs_sender.borrow().conns.is_empty() + } + + pub fn num_synced_rpcs(&self) -> usize { + self.watch_consensus_rpcs_sender.borrow().conns.len() + } +} + +pub struct ConnectionsGroup { + rpc_name_to_block: HashMap, + // TODO: what if there are two blocks with the same number? + highest_block: Option, +} + +impl Default for ConnectionsGroup { + fn default() -> Self { + Self { + rpc_name_to_block: Default::default(), + highest_block: Default::default(), + } + } +} + +impl ConnectionsGroup { + pub fn len(&self) -> usize { + self.rpc_name_to_block.len() + } + + fn remove(&mut self, rpc_name: &str) -> Option { + if let Some(removed_block) = self.rpc_name_to_block.remove(rpc_name) { + match self.highest_block.as_mut() { + None => {} + Some(current_highest_block) => { + if removed_block.hash() == current_highest_block.hash() { + for maybe_highest_block in self.rpc_name_to_block.values() { + if maybe_highest_block.number() > current_highest_block.number() { + *current_highest_block = maybe_highest_block.clone(); + }; + } + } + } + } + + Some(removed_block) + } else { + None + } + } + + fn insert(&mut self, rpc: &Web3Rpc, block: Web3ProxyBlock) -> Option { + // TODO: what about a reorg to the same height? + if Some(block.number()) > self.highest_block.as_ref().map(|x| x.number()) { + self.highest_block = Some(block.clone()); + } + + self.rpc_name_to_block.insert(rpc.name.clone(), block) + } + + // // TODO: do this during insert/remove? + // pub(self) async fn highest_block( + // &self, + // authorization: &Arc, + // web3_rpcs: &Web3Rpcs, + // ) -> Option { + // let mut checked_heads = HashSet::with_capacity(self.rpc_name_to_hash.len()); + // let mut highest_block = None::; + + // for (rpc_name, rpc_head_hash) in self.rpc_name_to_hash.iter() { + // // don't waste time checking the same hash multiple times + // if checked_heads.contains(rpc_head_hash) { + // continue; + // } + + // let rpc_block = match web3_rpcs + // .get_block_from_rpc(rpc_name, rpc_head_hash, authorization) + // .await + // { + // Ok(x) => x, + // Err(err) => { + // warn!( + // "failed getting block {} from {} while finding highest block number: {:?}", + // rpc_head_hash, rpc_name, err, + // ); + // continue; + // } + // }; + + // checked_heads.insert(rpc_head_hash); + + // // if this is the first block we've tried + // // or if this rpc's newest block has a higher number + // // we used to check total difficulty, but that isn't a thing anymore on ETH + // // TODO: we still need total difficulty on some other PoW chains. whats annoying is it isn't considered part of the "block header" just the block. so websockets don't return it + // let highest_num = highest_block + // .as_ref() + // .map(|x| x.number.expect("blocks here should always have a number")); + // let rpc_num = rpc_block.as_ref().number; + + // if rpc_num > highest_num { + // highest_block = Some(rpc_block); + // } + // } + + // highest_block + // } + + /// min_consensus_block_num keeps us from ever going backwards. + /// TODO: think about min_consensus_block_num more. i think this might cause an outage if the chain is doing weird things. but 503s is probably better than broken data. + pub(self) async fn consensus_head_connections( + &self, + authorization: &Arc, + web3_rpcs: &Web3Rpcs, + min_consensus_block_num: Option, + ) -> anyhow::Result { + let mut maybe_head_block = match self.highest_block.clone() { + None => return Err(anyhow::anyhow!("no blocks known")), + Some(x) => x, + }; + + // TODO: take max_distance_consensus_to_highest as an argument? + // TODO: what if someone's backup node is misconfigured and goes on a really fast forked chain? + let max_lag_consensus_to_highest = + if let Some(min_consensus_block_num) = min_consensus_block_num { + maybe_head_block + .number() + .saturating_sub(min_consensus_block_num) + .as_u64() + } else { + // TODO: get from app config? different chains probably should have different values. 10 is probably too much + 10 + }; + + let num_known = self.rpc_name_to_block.len(); + + if num_known < web3_rpcs.min_head_rpcs { + return Err(anyhow::anyhow!( + "not enough rpcs connected: {}/{}", + num_known, + web3_rpcs.min_head_rpcs, + )); + } + + let mut primary_rpcs_voted: Option = None; + let mut backup_rpcs_voted: Option = None; + + // track rpcs on this heaviest chain so we can build a new ConsensusConnections + let mut primary_consensus_rpcs = HashSet::<&str>::new(); + let mut backup_consensus_rpcs = HashSet::<&str>::new(); + + // a running total of the soft limits covered by the rpcs that agree on the head block + let mut primary_sum_soft_limit: u32 = 0; + let mut backup_sum_soft_limit: u32 = 0; + + // TODO: also track the sum of *available* hard_limits. if any servers have no hard limits, use their soft limit or no limit? + + // check the highest work block for a set of rpcs that can serve our request load + // if it doesn't have enough rpcs for our request load, check the parent block + // TODO: loop for how many parent blocks? we don't want to serve blocks that are too far behind. probably different per chain + // TODO: this loop is pretty long. any way to clean up this code? + for _ in 0..max_lag_consensus_to_highest { + let maybe_head_hash = maybe_head_block.hash(); + + // find all rpcs with maybe_head_hash as their current head + for (rpc_name, rpc_head) in self.rpc_name_to_block.iter() { + if rpc_head.hash() != maybe_head_hash { + // connection is not on the desired block + continue; + } + if backup_consensus_rpcs.contains(rpc_name.as_str()) { + // connection is on a later block in this same chain + continue; + } + if primary_consensus_rpcs.contains(rpc_name.as_str()) { + // connection is on a later block in this same chain + continue; + } + + if let Some(rpc) = web3_rpcs.conns.get(rpc_name.as_str()) { + if backup_rpcs_voted.is_some() { + // backups already voted for a head block. don't change it + } else { + backup_consensus_rpcs.insert(rpc_name); + backup_sum_soft_limit += rpc.soft_limit; + } + if !rpc.backup { + primary_consensus_rpcs.insert(rpc_name); + primary_sum_soft_limit += rpc.soft_limit; + } + } else { + // i don't think this is an error. i think its just if a reconnect is currently happening + warn!("connection missing: {}", rpc_name); + debug!("web3_rpcs.conns: {:#?}", web3_rpcs.conns); + } + } + + if primary_sum_soft_limit >= web3_rpcs.min_sum_soft_limit + && primary_consensus_rpcs.len() >= web3_rpcs.min_head_rpcs + { + // we have enough servers with enough requests! yey! + primary_rpcs_voted = Some(maybe_head_block.clone()); + break; + } + + if backup_rpcs_voted.is_none() + && backup_consensus_rpcs != primary_consensus_rpcs + && backup_sum_soft_limit >= web3_rpcs.min_sum_soft_limit + && backup_consensus_rpcs.len() >= web3_rpcs.min_head_rpcs + { + // if we include backup servers, we have enough servers with high enough limits + backup_rpcs_voted = Some(maybe_head_block.clone()); + } + + // not enough rpcs on this block. check the parent block + match web3_rpcs + .block(authorization, &maybe_head_block.parent_hash(), None) + .await + { + Ok(parent_block) => { + // trace!( + // child=%maybe_head_hash, parent=%parent_block.hash.unwrap(), "avoiding thundering herd. checking consensus on parent block", + // ); + maybe_head_block = parent_block.into(); + continue; + } + Err(err) => { + let soft_limit_percent = (primary_sum_soft_limit as f32 + / web3_rpcs.min_sum_soft_limit as f32) + * 100.0; + + let err_msg = format!("ran out of parents to check. rpcs {}/{}/{}. soft limit: {:.2}% ({}/{}). err: {:#?}", + primary_consensus_rpcs.len(), + num_known, + web3_rpcs.min_head_rpcs, + primary_sum_soft_limit, + web3_rpcs.min_sum_soft_limit, + soft_limit_percent, + err, + ); + + if backup_rpcs_voted.is_some() { + warn!("{}", err_msg); + break; + } else { + return Err(anyhow::anyhow!(err_msg)); + } + } + } + } + + // TODO: if consensus_head_rpcs.is_empty, try another method of finding the head block. will need to change the return Err above into breaks. + + // we've done all the searching for the heaviest block that we can + if (primary_consensus_rpcs.len() < web3_rpcs.min_head_rpcs + || primary_sum_soft_limit < web3_rpcs.min_sum_soft_limit) + && backup_rpcs_voted.is_none() + { + // if we get here, not enough servers are synced. return an error + let soft_limit_percent = + (primary_sum_soft_limit as f32 / web3_rpcs.min_sum_soft_limit as f32) * 100.0; + + return Err(anyhow::anyhow!( + "Not enough resources. rpcs {}/{}/{}. soft limit: {:.2}% ({}/{})", + primary_consensus_rpcs.len(), + num_known, + web3_rpcs.min_head_rpcs, + primary_sum_soft_limit, + web3_rpcs.min_sum_soft_limit, + soft_limit_percent, + )); + } + + // success! this block has enough soft limit and nodes on it (or on later blocks) + let conns: Vec> = primary_consensus_rpcs + .into_iter() + .filter_map(|conn_name| web3_rpcs.conns.get(conn_name).cloned()) + .collect(); + + #[cfg(debug_assertions)] + let _ = maybe_head_block.hash(); + #[cfg(debug_assertions)] + let _ = maybe_head_block.number(); + + Ok(ConsensusWeb3Rpcs { + head_block: Some(maybe_head_block), + conns, + backups_voted: backup_rpcs_voted, + backups_needed: primary_rpcs_voted.is_none(), + }) + } +} + +/// A ConsensusConnections builder that tracks all connection heads across multiple groups of servers +pub struct ConsensusFinder { + /// backups for all tiers are only used if necessary + /// tiers[0] = only tier 0. + /// tiers[1] = tier 0 and tier 1 + /// tiers[n] = tier 0..=n + /// This is a BTreeMap and not a Vec because sometimes a tier is empty + tiers: BTreeMap, + /// never serve blocks that are too old + max_block_age: Option, + /// tier 0 will be prefered as long as the distance between it and the other tiers is <= max_tier_lag + max_block_lag: Option, +} + +impl ConsensusFinder { + pub fn new(max_block_age: Option, max_block_lag: Option) -> Self { + Self { + tiers: Default::default(), + max_block_age, + max_block_lag, + } + } +} + +impl ConsensusFinder { + /// get the ConnectionsGroup that contains all rpcs + /// panics if there are no tiers + pub fn all_rpcs_group(&self) -> Option<&ConnectionsGroup> { + self.tiers.values().last() + } + + /// get the mutable ConnectionsGroup that contains all rpcs + pub fn all_mut(&mut self) -> Option<&mut ConnectionsGroup> { + self.tiers.values_mut().last() + } + + pub fn remove(&mut self, rpc: &Web3Rpc) -> Option { + let mut removed = None; + + for (i, tier_group) in self.tiers.iter_mut().rev() { + if i < &rpc.tier { + break; + } + let x = tier_group.remove(rpc.name.as_str()); + + if removed.is_none() && x.is_some() { + removed = x; + } + } + + removed + } + + /// returns the block that the rpc was on before updating to the new_block + pub fn insert(&mut self, rpc: &Web3Rpc, new_block: Web3ProxyBlock) -> Option { + let mut old = None; + + for (i, tier_group) in self.tiers.iter_mut().rev() { + if i > &rpc.tier { + break; + } + + // TODO: should new_block be a ref? + let x = tier_group.insert(rpc, new_block.clone()); + + if old.is_none() && x.is_some() { + old = x; + } + } + + old + } + + /// Update our tracking of the rpc and return true if something changed + pub(crate) async fn update_rpc( + &mut self, + rpc_head_block: Option, + rpc: Arc, + // we need this so we can save the block to caches. i don't like it though. maybe we should use a lazy_static Cache wrapper that has a "save_block" method?. i generally dislike globals but i also dislike all the types having to pass eachother around + web3_connections: &Web3Rpcs, + ) -> anyhow::Result { + // add the rpc's block to connection_heads, or remove the rpc from connection_heads + let changed = match rpc_head_block { + Some(mut rpc_head_block) => { + // we don't know if its on the heaviest chain yet + rpc_head_block = web3_connections + .try_cache_block(rpc_head_block, false) + .await?; + + // if let Some(max_block_lag) = max_block_lag { + // if rpc_head_block.number() < ??? { + // trace!("rpc_head_block from {} is too far behind! {}", rpc, rpc_head_block); + // return Ok(self.remove(&rpc).is_some()); + // } + // } + + if let Some(max_age) = self.max_block_age { + if rpc_head_block.age() > max_age { + trace!("rpc_head_block from {} is too old! {}", rpc, rpc_head_block); + return Ok(self.remove(&rpc).is_some()); + } + } + + if let Some(prev_block) = self.insert(&rpc, rpc_head_block.clone()) { + if prev_block.hash() == rpc_head_block.hash() { + // this block was already sent by this rpc. return early + false + } else { + // new block for this rpc + true + } + } else { + // first block for this rpc + true + } + } + None => { + if self.remove(&rpc).is_none() { + // this rpc was already removed + false + } else { + // rpc head changed from being synced to not + true + } + } + }; + + Ok(changed) + } + + // TODO: this could definitely be cleaner. i don't like the error handling/unwrapping + pub async fn best_consensus_connections( + &mut self, + authorization: &Arc, + web3_connections: &Web3Rpcs, + ) -> Option { + // TODO: attach context to these? + let highest_known_block = self.all_rpcs_group()?.highest_block.as_ref()?; + + trace!("highest_known_block: {}", highest_known_block); + + let min_block_num = self + .max_block_lag + .map(|x| highest_known_block.number().saturating_sub(x)) + // we also want to be sure we don't ever go backwards! + .max(web3_connections.head_block_num()); + + trace!("min_block_num: {:#?}", min_block_num); + + // TODO Should this be a Vec>>? + // TODO: how should errors be handled? + // TODO: find the best tier with a connectionsgroup. best case, this only queries the first tier + // TODO: do we need to calculate all of them? I think having highest_known_block included as part of min_block_num should make that unnecessary + for (i, x) in self.tiers.iter() { + trace!("checking tier {}", i); + if let Ok(consensus_head_connections) = x + .consensus_head_connections(authorization, web3_connections, min_block_num) + .await + { + trace!("success on tier {}", i); + // we got one! hopefully it didn't need to use any backups. + // but even if it did need backup servers, that is better than going to a worse tier + return Some(consensus_head_connections); + } + } + + return None; + } +} diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 83eb3922..e5293d2d 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -1,8 +1,8 @@ ///! Load balanced communication with a group of web3 rpc providers -use super::blockchain::{ArcBlock, BlockHashesCache}; +use super::blockchain::{BlockHashesCache, Web3ProxyBlock}; +use super::consensus::ConsensusWeb3Rpcs; use super::one::Web3Rpc; use super::request::{OpenRequestHandle, OpenRequestResult, RequestRevertHandler}; -use super::synced_connections::ConsensusWeb3Rpcs; use crate::app::{flatten_handle, AnyhowJoinHandle}; use crate::config::{BlockAndRpc, TxHashAndRpc, Web3RpcConfig}; use crate::frontend::authorization::{Authorization, RequestMetadata}; @@ -38,9 +38,9 @@ pub struct Web3Rpcs { /// any requests will be forwarded to one (or more) of these connections pub(crate) conns: HashMap>, /// all providers with the same consensus head block. won't update if there is no `self.watch_consensus_head_sender` - pub(super) watch_consensus_connections_sender: watch::Sender>, + pub(super) watch_consensus_rpcs_sender: watch::Sender>, /// this head receiver makes it easy to wait until there is a new block - pub(super) watch_consensus_head_receiver: Option>, + pub(super) watch_consensus_head_receiver: Option>, pub(super) pending_transactions: Cache, /// TODO: this map is going to grow forever unless we do some sort of pruning. maybe store pruned in redis? @@ -48,8 +48,14 @@ pub struct Web3Rpcs { pub(super) block_hashes: BlockHashesCache, /// blocks on the heaviest chain pub(super) block_numbers: Cache, + /// the number of rpcs required to agree on consensus for the head block (thundering herd protection) pub(super) min_head_rpcs: usize, + /// the soft limit required to agree on consensus for the head block. (thundering herd protection) pub(super) min_sum_soft_limit: u32, + /// how far behind the highest known block height we can be before we stop serving requests + pub(super) max_block_lag: Option, + /// how old our consensus head block we can be before we stop serving requests + pub(super) max_block_age: Option, } impl Web3Rpcs { @@ -60,13 +66,15 @@ impl Web3Rpcs { chain_id: u64, db_conn: Option, http_client: Option, + max_block_age: Option, + max_block_lag: Option, min_head_rpcs: usize, min_sum_soft_limit: u32, pending_transactions: Cache, pending_tx_sender: Option>, redis_pool: Option, server_configs: HashMap, - watch_consensus_head_sender: Option>, + watch_consensus_head_sender: Option>, ) -> anyhow::Result<(Arc, AnyhowJoinHandle<()>)> { let (pending_tx_id_sender, pending_tx_id_receiver) = flume::unbounded(); let (block_sender, block_receiver) = flume::unbounded::(); @@ -212,13 +220,15 @@ impl Web3Rpcs { let connections = Arc::new(Self { conns: connections, - watch_consensus_connections_sender, + watch_consensus_rpcs_sender: watch_consensus_connections_sender, watch_consensus_head_receiver, pending_transactions, block_hashes, block_numbers, min_sum_soft_limit, min_head_rpcs, + max_block_age, + max_block_lag, }); let authorization = Arc::new(Authorization::internal(db_conn.clone())?); @@ -254,7 +264,7 @@ impl Web3Rpcs { authorization: Arc, pending_tx_id_receiver: flume::Receiver, block_receiver: flume::Receiver, - head_block_sender: Option>, + head_block_sender: Option>, pending_tx_sender: Option>, ) -> anyhow::Result<()> { let mut futures = vec![]; @@ -455,7 +465,7 @@ impl Web3Rpcs { max_block_needed: Option<&U64>, ) -> anyhow::Result { let usable_rpcs_by_head_num_and_weight: BTreeMap<(Option, u64), Vec>> = { - let synced_connections = self.watch_consensus_connections_sender.borrow().clone(); + let synced_connections = self.watch_consensus_rpcs_sender.borrow().clone(); let head_block_num = if let Some(head_block) = synced_connections.head_block.as_ref() { head_block.number() @@ -499,7 +509,7 @@ impl Web3Rpcs { match x_head_block { None => continue, Some(x_head) => { - let key = (Some(x_head.number()), u64::MAX - x.tier); + let key = (Some(*x_head.number()), u64::MAX - x.tier); m.entry(key).or_insert_with(Vec::new).push(x); } @@ -508,6 +518,7 @@ impl Web3Rpcs { } cmp::Ordering::Equal => { // need the consensus head block. filter the synced rpcs + // TODO: this doesn't properly check the allow_backups variable! for x in synced_connections .conns .iter() @@ -519,7 +530,7 @@ impl Web3Rpcs { } } cmp::Ordering::Greater => { - // TODO? if the blocks is close and wait_for_sync and allow_backups, wait for change on a watch_consensus_connections_receiver().subscribe() + // TODO? if the blocks is close, wait for change on a watch_consensus_connections_receiver().subscribe() return Ok(OpenRequestResult::NotReady(allow_backups)); } } @@ -670,11 +681,7 @@ impl Web3Rpcs { let mut tried = HashSet::new(); - let mut synced_conns = self - .watch_consensus_connections_sender - .borrow() - .conns - .clone(); + let mut synced_conns = self.watch_consensus_rpcs_sender.borrow().conns.clone(); // synced connections are all on the same block. sort them by tier with higher soft limits first synced_conns.sort_by_cached_key(rpc_sync_status_sort_key); @@ -754,7 +761,7 @@ impl Web3Rpcs { let mut skip_rpcs = vec![]; let mut method_not_available_response = None; - let mut watch_consensus_connections = self.watch_consensus_connections_sender.subscribe(); + let mut watch_consensus_connections = self.watch_consensus_rpcs_sender.subscribe(); // TODO: maximum retries? right now its the total number of servers loop { @@ -1144,7 +1151,7 @@ impl Serialize for Web3Rpcs { state.serialize_field("conns", &conns)?; { - let consensus_connections = self.watch_consensus_connections_sender.borrow().clone(); + let consensus_connections = self.watch_consensus_rpcs_sender.borrow().clone(); // TODO: rename synced_connections to consensus_connections? state.serialize_field("synced_connections", &consensus_connections)?; } @@ -1181,10 +1188,8 @@ mod tests { // TODO: why is this allow needed? does tokio::test get in the way somehow? #![allow(unused_imports)] use super::*; - use crate::rpcs::{ - blockchain::{ConsensusFinder, SavedBlock}, - provider::Web3Provider, - }; + use crate::rpcs::consensus::ConsensusFinder; + use crate::rpcs::{blockchain::Web3ProxyBlock, provider::Web3Provider}; use ethers::types::{Block, U256}; use log::{trace, LevelFilter}; use parking_lot::RwLock; @@ -1213,7 +1218,7 @@ mod tests { let blocks: Vec<_> = [block_0, block_1, block_2] .into_iter() - .map(|x| SavedBlock::new(Arc::new(x))) + .map(|x| Web3ProxyBlock::new(Arc::new(x))) .collect(); let mut rpcs: Vec<_> = [ @@ -1298,9 +1303,8 @@ mod tests { let lagged_block = Arc::new(lagged_block); let head_block = Arc::new(head_block); - // TODO: write a impl From for Block -> BlockId? - let mut lagged_block: SavedBlock = lagged_block.into(); - let mut head_block: SavedBlock = head_block.into(); + let mut lagged_block: Web3ProxyBlock = lagged_block.into(); + let mut head_block: Web3ProxyBlock = head_block.into(); let block_data_limit = u64::MAX; @@ -1312,6 +1316,7 @@ mod tests { block_data_limit: block_data_limit.into(), tier: 0, head_block: RwLock::new(Some(head_block.clone())), + provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), ..Default::default() }; @@ -1323,6 +1328,7 @@ mod tests { block_data_limit: block_data_limit.into(), tier: 0, head_block: RwLock::new(Some(lagged_block.clone())), + provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), ..Default::default() }; @@ -1340,13 +1346,13 @@ mod tests { (lagged_rpc.name.clone(), lagged_rpc.clone()), ]); - let (watch_consensus_connections_sender, _) = watch::channel(Default::default()); + let (watch_consensus_rpcs_sender, _) = watch::channel(Default::default()); // TODO: make a Web3Rpcs::new let conns = Web3Rpcs { conns, watch_consensus_head_receiver: None, - watch_consensus_connections_sender, + watch_consensus_rpcs_sender, pending_transactions: Cache::builder() .max_capacity(10_000) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), @@ -1356,32 +1362,37 @@ mod tests { block_numbers: Cache::builder() .max_capacity(10_000) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), + // TODO: test max_block_age? + max_block_age: None, + // TODO: test max_block_lag? + max_block_lag: None, min_head_rpcs: 1, min_sum_soft_limit: 1, }; let authorization = Arc::new(Authorization::internal(None).unwrap()); - let (head_block_sender, _head_block_receiver) = - watch::channel::(Default::default()); - let mut connection_heads = ConsensusFinder::default(); + let (head_block_sender, _head_block_receiver) = watch::channel(Default::default()); + let mut consensus_finder = ConsensusFinder::new(None, None); // process None so that conns .process_block_from_rpc( &authorization, - &mut connection_heads, + &mut consensus_finder, None, lagged_rpc.clone(), &head_block_sender, &None, ) .await - .unwrap(); + .expect( + "its lagged, but it should still be seen as consensus if its the first to report", + ); conns .process_block_from_rpc( &authorization, - &mut connection_heads, + &mut consensus_finder, None, head_rpc.clone(), &head_block_sender, @@ -1414,12 +1425,12 @@ mod tests { assert!(matches!(x, OpenRequestResult::NotReady(true))); // add lagged blocks to the conns. both servers should be allowed - lagged_block.block = conns.save_block(lagged_block.block, true).await.unwrap(); + lagged_block = conns.try_cache_block(lagged_block, true).await.unwrap(); conns .process_block_from_rpc( &authorization, - &mut connection_heads, + &mut consensus_finder, Some(lagged_block.clone()), lagged_rpc, &head_block_sender, @@ -1430,7 +1441,7 @@ mod tests { conns .process_block_from_rpc( &authorization, - &mut connection_heads, + &mut consensus_finder, Some(lagged_block.clone()), head_rpc.clone(), &head_block_sender, @@ -1442,12 +1453,12 @@ mod tests { assert_eq!(conns.num_synced_rpcs(), 2); // add head block to the conns. lagged_rpc should not be available - head_block.block = conns.save_block(head_block.block, true).await.unwrap(); + head_block = conns.try_cache_block(head_block, true).await.unwrap(); conns .process_block_from_rpc( &authorization, - &mut connection_heads, + &mut consensus_finder, Some(head_block.clone()), head_rpc, &head_block_sender, @@ -1511,7 +1522,7 @@ mod tests { ..Default::default() }; - let head_block: SavedBlock = Arc::new(head_block).into(); + let head_block: Web3ProxyBlock = Arc::new(head_block).into(); let pruned_rpc = Web3Rpc { name: "pruned".to_string(), @@ -1548,13 +1559,13 @@ mod tests { (archive_rpc.name.clone(), archive_rpc.clone()), ]); - let (watch_consensus_connections_sender, _) = watch::channel(Default::default()); + let (watch_consensus_rpcs_sender, _) = watch::channel(Default::default()); // TODO: make a Web3Rpcs::new let conns = Web3Rpcs { conns, watch_consensus_head_receiver: None, - watch_consensus_connections_sender, + watch_consensus_rpcs_sender, pending_transactions: Cache::builder() .max_capacity(10) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), @@ -1566,13 +1577,14 @@ mod tests { .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), min_head_rpcs: 1, min_sum_soft_limit: 3_000, + max_block_age: None, + max_block_lag: None, }; let authorization = Arc::new(Authorization::internal(None).unwrap()); - let (head_block_sender, _head_block_receiver) = - watch::channel::(Default::default()); - let mut connection_heads = ConsensusFinder::default(); + let (head_block_sender, _head_block_receiver) = watch::channel(Default::default()); + let mut connection_heads = ConsensusFinder::new(None, None); conns .process_block_from_rpc( diff --git a/web3_proxy/src/rpcs/mod.rs b/web3_proxy/src/rpcs/mod.rs index 44ea5afe..41b7a6ea 100644 --- a/web3_proxy/src/rpcs/mod.rs +++ b/web3_proxy/src/rpcs/mod.rs @@ -1,8 +1,8 @@ // TODO: all pub, or export useful things here instead? pub mod blockchain; +pub mod consensus; pub mod many; pub mod one; pub mod provider; pub mod request; -pub mod synced_connections; pub mod transactions; diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index dfa08a4f..8b4decc4 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -1,5 +1,5 @@ ///! Rate-limited communication with a web3 provider. -use super::blockchain::{ArcBlock, BlockHashesCache, SavedBlock}; +use super::blockchain::{ArcBlock, BlockHashesCache, Web3ProxyBlock}; use super::provider::Web3Provider; use super::request::{OpenRequestHandle, OpenRequestResult}; use crate::app::{flatten_handle, AnyhowJoinHandle}; @@ -81,7 +81,7 @@ pub struct Web3Rpc { /// Lower tiers are higher priority when sending requests pub(super) tier: u64, /// TODO: change this to a watch channel so that http providers can subscribe and take action on change. - pub(super) head_block: RwLock>, + pub(super) head_block: RwLock>, /// Track how fast this RPC is pub(super) latency: Web3RpcLatencies, } @@ -308,9 +308,9 @@ impl Web3Rpc { } pub fn has_block_data(&self, needed_block_num: &U64) -> bool { - let head_block_num = match self.head_block.read().clone() { + let head_block_num = match self.head_block.read().as_ref() { None => return false, - Some(x) => x.number(), + Some(x) => *x.number(), }; // this rpc doesn't have that block yet. still syncing @@ -525,9 +525,9 @@ impl Web3Rpc { None } Ok(Some(new_head_block)) => { - let new_hash = new_head_block - .hash - .context("sending block to connections")?; + let new_head_block = Web3ProxyBlock::new(new_head_block); + + let new_hash = *new_head_block.hash(); // if we already have this block saved, set new_head_block to that arc. otherwise store this copy let new_head_block = block_map @@ -628,6 +628,7 @@ impl Web3Rpc { if let Some(client) = &*conn.provider.read().await { // trace!("health check unlocked with error on {}", conn); // returning error will trigger a reconnect + // also, do the health check as a way of keeping this rpc's request_ewma accurate // TODO: do a query of some kind } @@ -1164,7 +1165,7 @@ mod tests { let random_block = Arc::new(random_block); - let head_block = SavedBlock::new(random_block); + let head_block = Web3ProxyBlock::new(random_block); let block_data_limit = u64::MAX; let x = Web3Rpc { @@ -1194,7 +1195,7 @@ mod tests { .as_secs() .into(); - let head_block: SavedBlock = Arc::new(Block { + let head_block: Web3ProxyBlock = Arc::new(Block { hash: Some(H256::random()), number: Some(1_000_000.into()), timestamp: now, diff --git a/web3_proxy/src/rpcs/synced_connections.rs b/web3_proxy/src/rpcs/synced_connections.rs deleted file mode 100644 index e285c307..00000000 --- a/web3_proxy/src/rpcs/synced_connections.rs +++ /dev/null @@ -1,71 +0,0 @@ -use super::blockchain::{ArcBlock, SavedBlock}; -use super::many::Web3Rpcs; -use super::one::Web3Rpc; -use ethers::prelude::{H256, U64}; -use serde::Serialize; -use std::fmt; -use std::sync::Arc; - -/// A collection of Web3Rpcs that are on the same block. -/// Serialize is so we can print it on our debug endpoint -#[derive(Clone, Default, Serialize)] -pub struct ConsensusWeb3Rpcs { - // TODO: store ArcBlock instead? - pub(super) head_block: Option, - // TODO: this should be able to serialize, but it isn't - #[serde(skip_serializing)] - pub(super) conns: Vec>, - pub(super) num_checked_conns: usize, - pub(super) includes_backups: bool, -} - -impl ConsensusWeb3Rpcs { - pub fn num_conns(&self) -> usize { - self.conns.len() - } - - pub fn sum_soft_limit(&self) -> u32 { - self.conns.iter().fold(0, |sum, rpc| sum + rpc.soft_limit) - } - - // TODO: sum_hard_limit? -} - -impl fmt::Debug for ConsensusWeb3Rpcs { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // TODO: the default formatter takes forever to write. this is too quiet though - // TODO: print the actual conns? - f.debug_struct("ConsensusConnections") - .field("head_block", &self.head_block) - .field("num_conns", &self.conns.len()) - .finish_non_exhaustive() - } -} - -impl Web3Rpcs { - pub fn head_block(&self) -> Option { - self.watch_consensus_head_receiver - .as_ref() - .map(|x| x.borrow().clone()) - } - - pub fn head_block_hash(&self) -> Option { - self.head_block().and_then(|x| x.hash) - } - - pub fn head_block_num(&self) -> Option { - self.head_block().and_then(|x| x.number) - } - - pub fn synced(&self) -> bool { - !self - .watch_consensus_connections_sender - .borrow() - .conns - .is_empty() - } - - pub fn num_synced_rpcs(&self) -> usize { - self.watch_consensus_connections_sender.borrow().conns.len() - } -} From f1e6de36775259c4fc430fd176fad46201e5648e Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Tue, 14 Feb 2023 12:41:05 -0800 Subject: [PATCH 23/47] fixed one test. still more to fix though --- web3_proxy/src/rpcs/blockchain.rs | 12 ++++++++++- web3_proxy/src/rpcs/consensus.rs | 36 +++++++++++++++++++++++-------- web3_proxy/src/rpcs/many.rs | 4 ++-- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs index b6bfd01e..8ce7f495 100644 --- a/web3_proxy/src/rpcs/blockchain.rs +++ b/web3_proxy/src/rpcs/blockchain.rs @@ -8,6 +8,7 @@ use crate::{config::BlockAndRpc, jsonrpc::JsonRpcRequest}; use anyhow::Context; use derive_more::From; use ethers::prelude::{Block, TxHash, H256, U64}; +use hashbrown::HashSet; use log::{debug, error, trace, warn, Level}; use moka::future::Cache; use serde::Serialize; @@ -315,7 +316,16 @@ impl Web3Rpcs { ) -> anyhow::Result<()> { // TODO: indexmap or hashmap? what hasher? with_capacity? // TODO: this will grow unbounded. prune old heads on this at the same time we prune the graph? - let mut connection_heads = ConsensusFinder::new(self.max_block_age, self.max_block_lag); + let configured_tiers: Vec = self + .conns + .values() + .map(|x| x.tier) + .collect::>() + .into_iter() + .collect(); + + let mut connection_heads = + ConsensusFinder::new(&configured_tiers, self.max_block_age, self.max_block_lag); loop { match block_receiver.recv_async().await { diff --git a/web3_proxy/src/rpcs/consensus.rs b/web3_proxy/src/rpcs/consensus.rs index 289be536..bcaf1f56 100644 --- a/web3_proxy/src/rpcs/consensus.rs +++ b/web3_proxy/src/rpcs/consensus.rs @@ -3,6 +3,7 @@ use crate::frontend::authorization::Authorization; use super::blockchain::Web3ProxyBlock; use super::many::Web3Rpcs; use super::one::Web3Rpc; +use anyhow::Context; use ethers::prelude::{H256, U64}; use hashbrown::{HashMap, HashSet}; use log::{debug, trace, warn}; @@ -371,9 +372,19 @@ pub struct ConsensusFinder { } impl ConsensusFinder { - pub fn new(max_block_age: Option, max_block_lag: Option) -> Self { + pub fn new( + configured_tiers: &[u64], + max_block_age: Option, + max_block_lag: Option, + ) -> Self { + // TODO: this will need some thought when config reloading is written + let tiers = configured_tiers + .iter() + .map(|x| (*x, Default::default())) + .collect(); + Self { - tiers: Default::default(), + tiers, max_block_age, max_block_lag, } @@ -413,8 +424,10 @@ impl ConsensusFinder { pub fn insert(&mut self, rpc: &Web3Rpc, new_block: Web3ProxyBlock) -> Option { let mut old = None; + // TODO: error if rpc.tier is not in self.tiers + for (i, tier_group) in self.tiers.iter_mut().rev() { - if i > &rpc.tier { + if i < &rpc.tier { break; } @@ -443,7 +456,8 @@ impl ConsensusFinder { // we don't know if its on the heaviest chain yet rpc_head_block = web3_connections .try_cache_block(rpc_head_block, false) - .await?; + .await + .context("failed caching block")?; // if let Some(max_block_lag) = max_block_lag { // if rpc_head_block.number() < ??? { @@ -486,14 +500,18 @@ impl ConsensusFinder { Ok(changed) } - // TODO: this could definitely be cleaner. i don't like the error handling/unwrapping pub async fn best_consensus_connections( &mut self, authorization: &Arc, web3_connections: &Web3Rpcs, - ) -> Option { + ) -> anyhow::Result { // TODO: attach context to these? - let highest_known_block = self.all_rpcs_group()?.highest_block.as_ref()?; + let highest_known_block = self + .all_rpcs_group() + .context("no rpcs")? + .highest_block + .as_ref() + .context("no highest block")?; trace!("highest_known_block: {}", highest_known_block); @@ -518,10 +536,10 @@ impl ConsensusFinder { trace!("success on tier {}", i); // we got one! hopefully it didn't need to use any backups. // but even if it did need backup servers, that is better than going to a worse tier - return Some(consensus_head_connections); + return Ok(consensus_head_connections); } } - return None; + return Err(anyhow::anyhow!("failed finding consensus on all tiers")); } } diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index e5293d2d..2a3bd24a 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -1373,7 +1373,7 @@ mod tests { let authorization = Arc::new(Authorization::internal(None).unwrap()); let (head_block_sender, _head_block_receiver) = watch::channel(Default::default()); - let mut consensus_finder = ConsensusFinder::new(None, None); + let mut consensus_finder = ConsensusFinder::new(&[0, 1, 2, 3], None, None); // process None so that conns @@ -1584,7 +1584,7 @@ mod tests { let authorization = Arc::new(Authorization::internal(None).unwrap()); let (head_block_sender, _head_block_receiver) = watch::channel(Default::default()); - let mut connection_heads = ConsensusFinder::new(None, None); + let mut connection_heads = ConsensusFinder::new(&[0, 1, 2, 3], None, None); conns .process_block_from_rpc( From c65ffc9ce0c2c1c7ceebe57455c97d56078d5720 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Tue, 14 Feb 2023 17:41:40 -0800 Subject: [PATCH 24/47] getting close --- web3_proxy/src/app/mod.rs | 45 +- web3_proxy/src/app/ws.rs | 6 + web3_proxy/src/bin/web3_proxy_cli/daemon.rs | 2 +- web3_proxy/src/rpcs/blockchain.rs | 94 ++-- web3_proxy/src/rpcs/consensus.rs | 32 +- web3_proxy/src/rpcs/many.rs | 476 ++++++++++---------- web3_proxy/src/rpcs/one.rs | 14 +- web3_proxy/src/rpcs/request.rs | 5 +- 8 files changed, 352 insertions(+), 322 deletions(-) diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index 68ef6bb4..bcb54937 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -204,7 +204,7 @@ pub struct Web3ProxyApp { response_cache: ResponseCache, // don't drop this or the sender will stop working // TODO: broadcast channel instead? - watch_consensus_head_receiver: watch::Receiver, + watch_consensus_head_receiver: watch::Receiver>, pending_tx_sender: broadcast::Sender, pub config: AppConfig, pub db_conn: Option, @@ -541,8 +541,7 @@ impl Web3ProxyApp { }; // TODO: i don't like doing Block::default here! Change this to "None"? - let (watch_consensus_head_sender, watch_consensus_head_receiver) = - watch::channel(Web3ProxyBlock::default()); + let (watch_consensus_head_sender, watch_consensus_head_receiver) = watch::channel(None); // TODO: will one receiver lagging be okay? how big should this be? let (pending_tx_sender, pending_tx_receiver) = broadcast::channel(256); @@ -624,7 +623,7 @@ impl Web3ProxyApp { .await .context("spawning private_rpcs")?; - if private_rpcs.conns.is_empty() { + if private_rpcs.by_name.is_empty() { None } else { // save the handle to catch any errors @@ -740,7 +739,7 @@ impl Web3ProxyApp { Ok((app, cancellable_handles, important_background_handles).into()) } - pub fn head_block_receiver(&self) -> watch::Receiver { + pub fn head_block_receiver(&self) -> watch::Receiver> { self.watch_consensus_head_receiver.clone() } @@ -938,7 +937,7 @@ impl Web3ProxyApp { JsonRpcRequestEnum::Single(request) => { let (response, rpcs) = timeout( max_time, - self.proxy_cached_request(&authorization, request, proxy_mode), + self.proxy_cached_request(&authorization, request, proxy_mode, None), ) .await??; @@ -971,10 +970,26 @@ impl Web3ProxyApp { // TODO: spawn so the requests go in parallel? need to think about rate limiting more if we do that // TODO: improve flattening + + // get the head block now so that any requests that need it all use the same block + // TODO: FrontendErrorResponse that handles "no servers synced" in a consistent way + // TODO: this still has an edge condition if there is a reorg in the middle of the request!!! + let head_block_num = self + .balanced_rpcs + .head_block_num() + .context(anyhow::anyhow!("no servers synced"))?; + let responses = join_all( requests .into_iter() - .map(|request| self.proxy_cached_request(authorization, request, proxy_mode)) + .map(|request| { + self.proxy_cached_request( + authorization, + request, + proxy_mode, + Some(head_block_num), + ) + }) .collect::>(), ) .await; @@ -1023,6 +1038,7 @@ impl Web3ProxyApp { authorization: &Arc, mut request: JsonRpcRequest, proxy_mode: ProxyMode, + head_block_num: Option, ) -> Result<(JsonRpcForwardedResponse, Vec>), FrontendErrorResponse> { // trace!("Received request: {:?}", request); @@ -1139,7 +1155,7 @@ impl Web3ProxyApp { serde_json::Value::Array(vec![]) } "eth_blockNumber" => { - match self.balanced_rpcs.head_block_num() { + match head_block_num.or(self.balanced_rpcs.head_block_num()) { Some(head_block_num) => { json!(head_block_num) } @@ -1237,7 +1253,11 @@ impl Web3ProxyApp { (&self.balanced_rpcs, default_num) }; - let head_block_num = self.balanced_rpcs.head_block_num(); + let head_block_num = head_block_num + .or(self.balanced_rpcs.head_block_num()) + .ok_or_else(|| anyhow::anyhow!("no servers synced"))?; + + // TODO: error/wait if no head block! // try_send_all_upstream_servers puts the request id into the response. no need to do that ourselves here. let mut response = private_rpcs @@ -1245,7 +1265,7 @@ impl Web3ProxyApp { authorization, &request, Some(request_metadata.clone()), - head_block_num.as_ref(), + Some(&head_block_num), None, Level::Trace, num, @@ -1440,9 +1460,8 @@ impl Web3ProxyApp { // emit stats // TODO: if no servers synced, wait for them to be synced? probably better to error and let haproxy retry another server - let head_block_num = self - .balanced_rpcs - .head_block_num() + let head_block_num = head_block_num + .or(self.balanced_rpcs.head_block_num()) .context("no servers synced")?; // we do this check before checking caches because it might modify the request params diff --git a/web3_proxy/src/app/ws.rs b/web3_proxy/src/app/ws.rs index e61db2c2..b125a5fa 100644 --- a/web3_proxy/src/app/ws.rs +++ b/web3_proxy/src/app/ws.rs @@ -61,6 +61,12 @@ impl Web3ProxyApp { ); while let Some(new_head) = head_block_receiver.next().await { + let new_head = if let Some(new_head) = new_head { + new_head + } else { + continue; + }; + // TODO: what should the payload for RequestMetadata be? let request_metadata = Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap()); diff --git a/web3_proxy/src/bin/web3_proxy_cli/daemon.rs b/web3_proxy/src/bin/web3_proxy_cli/daemon.rs index cf2f4cf8..465e545e 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/daemon.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/daemon.rs @@ -64,7 +64,7 @@ async fn run( )); // wait until the app has seen its first consensus head block - // TODO: if backups were included, wait a little longer + // TODO: if backups were included, wait a little longer? let _ = spawned_app.app.head_block_receiver().changed().await; // start the frontend port diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs index 8ce7f495..9aa018a0 100644 --- a/web3_proxy/src/rpcs/blockchain.rs +++ b/web3_proxy/src/rpcs/blockchain.rs @@ -5,7 +5,7 @@ use super::one::Web3Rpc; use super::transactions::TxStatus; use crate::frontend::authorization::Authorization; use crate::{config::BlockAndRpc, jsonrpc::JsonRpcRequest}; -use anyhow::Context; +use anyhow::{anyhow, Context}; use derive_more::From; use ethers::prelude::{Block, TxHash, H256, U64}; use hashbrown::HashSet; @@ -45,7 +45,11 @@ impl PartialEq for Web3ProxyBlock { impl Web3ProxyBlock { /// A new block has arrived over a subscription - pub fn new(block: ArcBlock) -> Self { + pub fn try_new(block: ArcBlock) -> Option { + if block.number.is_none() || block.hash.is_none() { + return None; + } + let mut x = Self { block, received_age: None, @@ -56,7 +60,7 @@ impl Web3ProxyBlock { // TODO: emit a stat for received_age x.received_age = Some(x.age()); - x + Some(x) } pub fn age(&self) -> u64 { @@ -97,12 +101,20 @@ impl Web3ProxyBlock { } } -impl From for Web3ProxyBlock { - fn from(x: ArcBlock) -> Self { - Web3ProxyBlock { +impl TryFrom for Web3ProxyBlock { + type Error = anyhow::Error; + + fn try_from(x: ArcBlock) -> Result { + if x.number.is_none() || x.hash.is_none() { + return Err(anyhow!("Blocks here must have a number of hash")); + } + + let b = Web3ProxyBlock { block: x, received_age: None, - } + }; + + Ok(b) } } @@ -184,7 +196,13 @@ impl Web3Rpcs { None, ) .await? - .map(Into::into) + .and_then(|x| { + if x.number.is_none() { + None + } else { + x.try_into().ok() + } + }) .context("no block!")?, None => { // TODO: helper for method+params => JsonRpcRequest @@ -208,8 +226,10 @@ impl Web3Rpcs { let block: Option = serde_json::from_str(block.get())?; - // TODO: from isn't great here. received time is going to be weird - block.map(Into::into).context("no block!")? + let block: ArcBlock = block.context("no block in the response")?; + + // TODO: received time is going to be weird + Web3ProxyBlock::try_from(block)? } }; @@ -252,7 +272,11 @@ impl Web3Rpcs { // be sure the requested block num exists // TODO: is this okay? what if we aren't synced?! - let mut head_block_num = *consensus_head_receiver.borrow_and_update().number(); + let mut head_block_num = *consensus_head_receiver + .borrow_and_update() + .as_ref() + .context("no consensus head block")? + .number(); loop { if num <= &head_block_num { @@ -262,7 +286,9 @@ impl Web3Rpcs { trace!("waiting for future block {} > {}", num, head_block_num); consensus_head_receiver.changed().await?; - head_block_num = *consensus_head_receiver.borrow_and_update().number(); + if let Some(head) = consensus_head_receiver.borrow_and_update().as_ref() { + head_block_num = *head.number(); + } } let block_depth = (head_block_num - num).as_u64(); @@ -297,7 +323,7 @@ impl Web3Rpcs { let block: ArcBlock = serde_json::from_str(raw_block.get())?; - let block = Web3ProxyBlock::from(block); + let block = Web3ProxyBlock::try_from(block)?; // the block was fetched using eth_getBlockByNumber, so it should have all fields and be on the heaviest chain let block = self.try_cache_block(block, true).await?; @@ -311,13 +337,13 @@ impl Web3Rpcs { block_receiver: flume::Receiver, // TODO: document that this is a watch sender and not a broadcast! if things get busy, blocks might get missed // Geth's subscriptions have the same potential for skipping blocks. - head_block_sender: watch::Sender, + head_block_sender: watch::Sender>, pending_tx_sender: Option>, ) -> anyhow::Result<()> { // TODO: indexmap or hashmap? what hasher? with_capacity? // TODO: this will grow unbounded. prune old heads on this at the same time we prune the graph? let configured_tiers: Vec = self - .conns + .by_name .values() .map(|x| x.tier) .collect::>() @@ -363,7 +389,7 @@ impl Web3Rpcs { consensus_finder: &mut ConsensusFinder, rpc_head_block: Option, rpc: Arc, - head_block_sender: &watch::Sender, + head_block_sender: &watch::Sender>, pending_tx_sender: &Option>, ) -> anyhow::Result<()> { // TODO: how should we handle an error here? @@ -392,12 +418,11 @@ impl Web3Rpcs { let backups_needed = new_synced_connections.backups_needed; let consensus_head_block = new_synced_connections.head_block.clone(); let num_consensus_rpcs = new_synced_connections.num_conns(); - let num_checked_rpcs = 0; // TODO: figure this out let num_active_rpcs = consensus_finder .all_rpcs_group() .map(|x| x.len()) .unwrap_or_default(); - let total_rpcs = self.conns.len(); + let total_rpcs = self.by_name.len(); let old_consensus_head_connections = self .watch_consensus_rpcs_sender @@ -409,10 +434,9 @@ impl Web3Rpcs { match &old_consensus_head_connections.head_block { None => { debug!( - "first {}{}/{}/{}/{} block={}, rpc={}", + "first {}{}/{}/{} block={}, rpc={}", backups_voted_str, num_consensus_rpcs, - num_checked_rpcs, num_active_rpcs, total_rpcs, consensus_head_block, @@ -429,7 +453,7 @@ impl Web3Rpcs { self.try_cache_block(consensus_head_block, true).await?; head_block_sender - .send(consensus_head_block) + .send(Some(consensus_head_block)) .context("head_block_sender sending consensus_head_block")?; } Some(old_head_block) => { @@ -445,10 +469,9 @@ impl Web3Rpcs { // no change in hash. no need to use head_block_sender // TODO: trace level if rpc is backup debug!( - "con {}{}/{}/{}/{} con={} rpc={}@{}", + "con {}{}/{}/{} con={} rpc={}@{}", backups_voted_str, num_consensus_rpcs, - num_checked_rpcs, num_active_rpcs, total_rpcs, consensus_head_block, @@ -463,10 +486,9 @@ impl Web3Rpcs { } debug!( - "unc {}{}/{}/{}/{} con_head={} old={} rpc={}@{}", + "unc {}{}/{}/{} con_head={} old={} rpc={}@{}", backups_voted_str, num_consensus_rpcs, - num_checked_rpcs, num_active_rpcs, total_rpcs, consensus_head_block, @@ -481,7 +503,7 @@ impl Web3Rpcs { .context("save consensus_head_block as heaviest chain")?; head_block_sender - .send(consensus_head_block) + .send(Some(consensus_head_block)) .context("head_block_sender sending consensus_head_block")?; } } @@ -489,10 +511,9 @@ impl Web3Rpcs { // this is unlikely but possible // TODO: better log warn!( - "chain rolled back {}{}/{}/{}/{} con={} old={} rpc={}@{}", + "chain rolled back {}{}/{}/{} con={} old={} rpc={}@{}", backups_voted_str, num_consensus_rpcs, - num_checked_rpcs, num_active_rpcs, total_rpcs, consensus_head_block, @@ -515,15 +536,14 @@ impl Web3Rpcs { )?; head_block_sender - .send(consensus_head_block) + .send(Some(consensus_head_block)) .context("head_block_sender sending consensus_head_block")?; } Ordering::Greater => { debug!( - "new {}{}/{}/{}/{} con={} rpc={}@{}", + "new {}{}/{}/{} con={} rpc={}@{}", backups_voted_str, num_consensus_rpcs, - num_checked_rpcs, num_active_rpcs, total_rpcs, consensus_head_block, @@ -539,7 +559,7 @@ impl Web3Rpcs { let consensus_head_block = self.try_cache_block(consensus_head_block, true).await?; - head_block_sender.send(consensus_head_block)?; + head_block_sender.send(Some(consensus_head_block))?; } } } @@ -550,23 +570,23 @@ impl Web3Rpcs { .map(|x| x.to_string()) .unwrap_or_else(|| "None".to_string()); - if num_checked_rpcs >= self.min_head_rpcs { + if num_active_rpcs >= self.min_head_rpcs { + // no consensus!!! error!( - "non {}{}/{}/{}/{} rpc={}@{}", + "non {}{}/{}/{} rpc={}@{}", backups_voted_str, num_consensus_rpcs, - num_checked_rpcs, num_active_rpcs, total_rpcs, rpc, rpc_head_str, ); } else { + // no consensus, but we do not have enough rpcs connected yet to panic debug!( - "non {}{}/{}/{}/{} rpc={}@{}", + "non {}{}/{}/{} rpc={}@{}", backups_voted_str, num_consensus_rpcs, - num_checked_rpcs, num_active_rpcs, total_rpcs, rpc, diff --git a/web3_proxy/src/rpcs/consensus.rs b/web3_proxy/src/rpcs/consensus.rs index bcaf1f56..847892cf 100644 --- a/web3_proxy/src/rpcs/consensus.rs +++ b/web3_proxy/src/rpcs/consensus.rs @@ -19,18 +19,18 @@ pub struct ConsensusWeb3Rpcs { pub(super) head_block: Option, // TODO: this should be able to serialize, but it isn't #[serde(skip_serializing)] - pub(super) conns: Vec>, + pub(super) rpcs: Vec>, pub(super) backups_voted: Option, pub(super) backups_needed: bool, } impl ConsensusWeb3Rpcs { pub fn num_conns(&self) -> usize { - self.conns.len() + self.rpcs.len() } pub fn sum_soft_limit(&self) -> u32 { - self.conns.iter().fold(0, |sum, rpc| sum + rpc.soft_limit) + self.rpcs.iter().fold(0, |sum, rpc| sum + rpc.soft_limit) } // TODO: sum_hard_limit? @@ -42,7 +42,7 @@ impl fmt::Debug for ConsensusWeb3Rpcs { // TODO: print the actual conns? f.debug_struct("ConsensusConnections") .field("head_block", &self.head_block) - .field("num_conns", &self.conns.len()) + .field("num_conns", &self.rpcs.len()) .finish_non_exhaustive() } } @@ -52,7 +52,7 @@ impl Web3Rpcs { pub fn head_block(&self) -> Option { self.watch_consensus_head_receiver .as_ref() - .map(|x| x.borrow().clone()) + .and_then(|x| x.borrow().clone()) } // TODO: return a ref? @@ -66,11 +66,11 @@ impl Web3Rpcs { } pub fn synced(&self) -> bool { - !self.watch_consensus_rpcs_sender.borrow().conns.is_empty() + !self.watch_consensus_rpcs_sender.borrow().rpcs.is_empty() } pub fn num_synced_rpcs(&self) -> usize { - self.watch_consensus_rpcs_sender.borrow().conns.len() + self.watch_consensus_rpcs_sender.borrow().rpcs.len() } } @@ -243,7 +243,7 @@ impl ConnectionsGroup { continue; } - if let Some(rpc) = web3_rpcs.conns.get(rpc_name.as_str()) { + if let Some(rpc) = web3_rpcs.by_name.get(rpc_name.as_str()) { if backup_rpcs_voted.is_some() { // backups already voted for a head block. don't change it } else { @@ -257,7 +257,7 @@ impl ConnectionsGroup { } else { // i don't think this is an error. i think its just if a reconnect is currently happening warn!("connection missing: {}", rpc_name); - debug!("web3_rpcs.conns: {:#?}", web3_rpcs.conns); + debug!("web3_rpcs.conns: {:#?}", web3_rpcs.by_name); } } @@ -340,7 +340,7 @@ impl ConnectionsGroup { // success! this block has enough soft limit and nodes on it (or on later blocks) let conns: Vec> = primary_consensus_rpcs .into_iter() - .filter_map(|conn_name| web3_rpcs.conns.get(conn_name).cloned()) + .filter_map(|conn_name| web3_rpcs.by_name.get(conn_name).cloned()) .collect(); #[cfg(debug_assertions)] @@ -350,7 +350,7 @@ impl ConnectionsGroup { Ok(ConsensusWeb3Rpcs { head_block: Some(maybe_head_block), - conns, + rpcs: conns, backups_voted: backup_rpcs_voted, backups_needed: primary_rpcs_voted.is_none(), }) @@ -528,7 +528,7 @@ impl ConsensusFinder { // TODO: find the best tier with a connectionsgroup. best case, this only queries the first tier // TODO: do we need to calculate all of them? I think having highest_known_block included as part of min_block_num should make that unnecessary for (i, x) in self.tiers.iter() { - trace!("checking tier {}", i); + trace!("checking tier {}: {:#?}", i, x.rpc_name_to_block); if let Ok(consensus_head_connections) = x .consensus_head_connections(authorization, web3_connections, min_block_num) .await @@ -543,3 +543,11 @@ impl ConsensusFinder { return Err(anyhow::anyhow!("failed finding consensus on all tiers")); } } + +#[cfg(test)] +mod test { + #[test] + fn test_simplest_case_consensus_head_connections() { + todo!(); + } +} diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 2a3bd24a..c449b241 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -16,6 +16,7 @@ use futures::future::try_join_all; use futures::stream::FuturesUnordered; use futures::StreamExt; use hashbrown::{HashMap, HashSet}; +use itertools::Itertools; use log::{debug, error, info, trace, warn, Level}; use migration::sea_orm::DatabaseConnection; use moka::future::{Cache, ConcurrentCacheExt}; @@ -23,6 +24,7 @@ use serde::ser::{SerializeStruct, Serializer}; use serde::Serialize; use serde_json::json; use serde_json::value::RawValue; +use std::cmp::min_by_key; use std::collections::BTreeMap; use std::sync::atomic::Ordering; use std::sync::Arc; @@ -36,11 +38,11 @@ use tokio::time::{interval, sleep, sleep_until, Duration, Instant, MissedTickBeh #[derive(From)] pub struct Web3Rpcs { /// any requests will be forwarded to one (or more) of these connections - pub(crate) conns: HashMap>, + pub(crate) by_name: HashMap>, /// all providers with the same consensus head block. won't update if there is no `self.watch_consensus_head_sender` pub(super) watch_consensus_rpcs_sender: watch::Sender>, /// this head receiver makes it easy to wait until there is a new block - pub(super) watch_consensus_head_receiver: Option>, + pub(super) watch_consensus_head_receiver: Option>>, pub(super) pending_transactions: Cache, /// TODO: this map is going to grow forever unless we do some sort of pruning. maybe store pruned in redis? @@ -74,7 +76,7 @@ impl Web3Rpcs { pending_tx_sender: Option>, redis_pool: Option, server_configs: HashMap, - watch_consensus_head_sender: Option>, + watch_consensus_head_sender: Option>>, ) -> anyhow::Result<(Arc, AnyhowJoinHandle<()>)> { let (pending_tx_id_sender, pending_tx_id_receiver) = flume::unbounded(); let (block_sender, block_receiver) = flume::unbounded::(); @@ -219,7 +221,7 @@ impl Web3Rpcs { watch_consensus_head_sender.as_ref().map(|x| x.subscribe()); let connections = Arc::new(Self { - conns: connections, + by_name: connections, watch_consensus_rpcs_sender: watch_consensus_connections_sender, watch_consensus_head_receiver, pending_transactions, @@ -253,7 +255,7 @@ impl Web3Rpcs { } pub fn get(&self, conn_name: &str) -> Option<&Arc> { - self.conns.get(conn_name) + self.by_name.get(conn_name) } /// subscribe to blocks and transactions from all the backend rpcs. @@ -264,7 +266,7 @@ impl Web3Rpcs { authorization: Arc, pending_tx_id_receiver: flume::Receiver, block_receiver: flume::Receiver, - head_block_sender: Option>, + head_block_sender: Option>>, pending_tx_sender: Option>, ) -> anyhow::Result<()> { let mut futures = vec![]; @@ -426,70 +428,64 @@ impl Web3Rpcs { min_block_needed: Option<&U64>, max_block_needed: Option<&U64>, ) -> anyhow::Result { - if let Ok(without_backups) = self - ._best_consensus_head_connection( - false, - authorization, - request_metadata, - skip, - min_block_needed, - max_block_needed, - ) - .await - { - // TODO: this might use backups too eagerly. but even when we allow backups, we still prioritize our own - if matches!(without_backups, OpenRequestResult::Handle(_)) { - return Ok(without_backups); - } - } - - self._best_consensus_head_connection( - true, - authorization, - request_metadata, - skip, - min_block_needed, - max_block_needed, - ) - .await - } - - /// get the best available rpc server with the consensus head block. it might have blocks after the consensus head - async fn _best_consensus_head_connection( - &self, - allow_backups: bool, - authorization: &Arc, - request_metadata: Option<&Arc>, - skip: &[Arc], - min_block_needed: Option<&U64>, - max_block_needed: Option<&U64>, - ) -> anyhow::Result { - let usable_rpcs_by_head_num_and_weight: BTreeMap<(Option, u64), Vec>> = { + let usable_rpcs_by_tier_and_head_number: BTreeMap<(u64, Option), Vec>> = { let synced_connections = self.watch_consensus_rpcs_sender.borrow().clone(); - let head_block_num = if let Some(head_block) = synced_connections.head_block.as_ref() { - head_block.number() - } else { - // TODO: optionally wait for a head block >= min_block_needed - return Ok(OpenRequestResult::NotReady(allow_backups)); + let (head_block_num, head_block_age) = + if let Some(head_block) = synced_connections.head_block.as_ref() { + (head_block.number(), head_block.age()) + } else { + // TODO: optionally wait for a head_block.number() >= min_block_needed + // TODO: though i think that wait would actually need to be earlier in the request + return Ok(OpenRequestResult::NotReady); + }; + + let needed_blocks_comparison = match (min_block_needed, max_block_needed) { + (None, None) => { + // no required block given. treat this like the requested the consensus head block + cmp::Ordering::Equal + } + (None, Some(max_block_needed)) => max_block_needed.cmp(head_block_num), + (Some(min_block_needed), None) => min_block_needed.cmp(head_block_num), + (Some(min_block_needed), Some(max_block_needed)) => { + match min_block_needed.cmp(max_block_needed) { + cmp::Ordering::Equal => min_block_needed.cmp(head_block_num), + cmp::Ordering::Greater => { + return Err(anyhow::anyhow!( + "Invalid blocks bounds requested. min ({}) > max ({})", + min_block_needed, + max_block_needed + )) + } + cmp::Ordering::Less => { + // hmmmm + todo!("now what do we do?"); + } + } + } }; - let min_block_needed = min_block_needed.unwrap_or(&head_block_num); - + // collect "usable_rpcs_by_head_num_and_weight" + // TODO: MAKE SURE None SORTS LAST? let mut m = BTreeMap::new(); - match min_block_needed.cmp(&head_block_num) { + match needed_blocks_comparison { cmp::Ordering::Less => { - // need an old block. check all the rpcs. prefer the most synced + // need an old block. check all the rpcs. ignore rpcs that are still syncing + + let min_block_age = + self.max_block_age.map(|x| head_block_age.saturating_sub(x)); + let min_sync_num = self.max_block_lag.map(|x| head_block_num.saturating_sub(x)); + + // TODO: cache this somehow? + // TODO: maybe have a helper on synced_connections? that way sum_soft_limits/min_synced_rpcs will be DRY for x in self - .conns + .by_name .values() .filter(|x| { - if !allow_backups && x.backup { - false - } else if skip.contains(x) { - false - } else if !x.has_block_data(min_block_needed) { + // TODO: move a bunch of this onto a rpc.is_synced function + if skip.contains(x) { + // we've already tried this server or have some other reason to skip it false } else if max_block_needed .and_then(|max_block_needed| { @@ -497,8 +493,18 @@ impl Web3Rpcs { }) .unwrap_or(false) { + // server does not have the max block + false + } else if min_block_needed + .and_then(|min_block_needed| { + Some(!x.has_block_data(min_block_needed)) + }) + .unwrap_or(false) + { + // server does not have the min block false } else { + // server has the block we need! true } }) @@ -506,32 +512,43 @@ impl Web3Rpcs { { let x_head_block = x.head_block.read().clone(); - match x_head_block { - None => continue, - Some(x_head) => { - let key = (Some(*x_head.number()), u64::MAX - x.tier); + if let Some(x_head) = x_head_block { + // TODO: should nodes that are ahead of the consensus block have priority? seems better to spread the load + let x_head_num = x_head.number().min(head_block_num); - m.entry(key).or_insert_with(Vec::new).push(x); + // TODO: do we really need to check head_num and age? + if let Some(min_sync_num) = min_sync_num.as_ref() { + if x_head_num < min_sync_num { + continue; + } } + if let Some(min_block_age) = min_block_age { + if x_head.age() < min_block_age { + // rpc is still syncing + continue; + } + } + + let key = (x.tier, Some(*x_head_num)); + + m.entry(key).or_insert_with(Vec::new).push(x); } } + + // TODO: check min_synced_rpcs and min_sum_soft_limits? or maybe better to just try to serve the request? } cmp::Ordering::Equal => { // need the consensus head block. filter the synced rpcs - // TODO: this doesn't properly check the allow_backups variable! - for x in synced_connections - .conns - .iter() - .filter(|x| !skip.contains(x)) - { - let key = (None, u64::MAX - x.tier); + for x in synced_connections.rpcs.iter().filter(|x| !skip.contains(x)) { + // the key doesn't matter if we are checking synced connections. its already sized to what we need + let key = (0, None); m.entry(key).or_insert_with(Vec::new).push(x.clone()); } } cmp::Ordering::Greater => { - // TODO? if the blocks is close, wait for change on a watch_consensus_connections_receiver().subscribe() - return Ok(OpenRequestResult::NotReady(allow_backups)); + // TODO? if the blocks is close, maybe we could wait for change on a watch_consensus_connections_receiver().subscribe() + return Ok(OpenRequestResult::NotReady); } } @@ -540,42 +557,24 @@ impl Web3Rpcs { let mut earliest_retry_at = None; - for usable_rpcs in usable_rpcs_by_head_num_and_weight.into_values().rev() { - // we sort on a combination of values. cache them here so that we don't do this math multiple times. - // TODO: is this necessary if we use sort_by_cached_key? - let available_request_map: HashMap<_, f64> = usable_rpcs - .iter() - .map(|rpc| { - // TODO: weighted sort by remaining hard limit? - // TODO: weighted sort by soft_limit - ewma_active_requests? that assumes soft limits are any good - (rpc, 1.0) - }) - .collect(); - - warn!("todo: better sort here"); - - let sorted_rpcs = { - if usable_rpcs.len() == 1 { - // TODO: try the next tier - vec![usable_rpcs.get(0).expect("there should be 1")] - } else { - let mut rng = thread_fast_rng::thread_fast_rng(); - - usable_rpcs - .choose_multiple_weighted(&mut rng, usable_rpcs.len(), |rpc| { - *available_request_map - .get(rpc) - .expect("rpc should always be in available_request_map") - }) - .unwrap() - .collect::>() - } + for mut usable_rpcs in usable_rpcs_by_tier_and_head_number.into_values() { + // sort the tier randomly + if usable_rpcs.len() == 1 { + // TODO: include an rpc from the next tier? + } else { + // we can't get the rng outside of this loop because it is not Send + // this function should be pretty fast anyway, so it shouldn't matter too much + let mut rng = thread_fast_rng::thread_fast_rng(); + usable_rpcs.shuffle(&mut rng); }; - // now that the rpcs are sorted, try to get an active request handle for one of them - // TODO: pick two randomly and choose the one with the lower rpc.latency.ewma - for best_rpc in sorted_rpcs.into_iter() { - // increment our connection counter + // now that the rpcs are shuffled, try to get an active request handle for one of them + // pick the first two and try the one with the lower rpc.latency.ewma + // TODO: chunks or tuple windows? + for (rpc_a, rpc_b) in usable_rpcs.into_iter().circular_tuple_windows() { + let best_rpc = min_by_key(rpc_a, rpc_b, |x| x.latency.request_ewma); + + // just because it has lower latency doesn't mean we are sure to get a connection match best_rpc.try_request_handle(authorization, None).await { Ok(OpenRequestResult::Handle(handle)) => { // trace!("opened handle: {}", best_rpc); @@ -584,7 +583,7 @@ impl Web3Rpcs { Ok(OpenRequestResult::RetryAt(retry_at)) => { earliest_retry_at = earliest_retry_at.min(Some(retry_at)); } - Ok(OpenRequestResult::NotReady(_)) => { + Ok(OpenRequestResult::NotReady) => { // TODO: log a warning? emit a stat? } Err(err) => { @@ -614,7 +613,7 @@ impl Web3Rpcs { // TODO: should we log here? - Ok(OpenRequestResult::NotReady(allow_backups)) + Ok(OpenRequestResult::NotReady) } Some(earliest_retry_at) => { warn!("no servers on {:?}! {:?}", self, earliest_retry_at); @@ -676,19 +675,19 @@ impl Web3Rpcs { let mut max_count = if let Some(max_count) = max_count { max_count } else { - self.conns.len() + self.by_name.len() }; let mut tried = HashSet::new(); - let mut synced_conns = self.watch_consensus_rpcs_sender.borrow().conns.clone(); + let mut synced_conns = self.watch_consensus_rpcs_sender.borrow().rpcs.clone(); // synced connections are all on the same block. sort them by tier with higher soft limits first synced_conns.sort_by_cached_key(rpc_sync_status_sort_key); // if there aren't enough synced connections, include more connections // TODO: only do this sorting if the synced_conns isn't enough - let mut all_conns: Vec<_> = self.conns.values().cloned().collect(); + let mut all_conns: Vec<_> = self.by_name.values().cloned().collect(); all_conns.sort_by_cached_key(rpc_sync_status_sort_key); for connection in itertools::chain(synced_conns, all_conns) { @@ -728,7 +727,7 @@ impl Web3Rpcs { max_count -= 1; selected_rpcs.push(handle) } - Ok(OpenRequestResult::NotReady(_)) => { + Ok(OpenRequestResult::NotReady) => { warn!("no request handle for {}", connection) } Err(err) => { @@ -767,7 +766,7 @@ impl Web3Rpcs { loop { let num_skipped = skip_rpcs.len(); - if num_skipped == self.conns.len() { + if num_skipped == self.by_name.len() { break; } @@ -918,7 +917,7 @@ impl Web3Rpcs { } } } - OpenRequestResult::NotReady(backups_included) => { + OpenRequestResult::NotReady => { if let Some(request_metadata) = request_metadata { request_metadata.no_servers.fetch_add(1, Ordering::Release); } @@ -930,7 +929,7 @@ impl Web3Rpcs { if let Some(min_block_needed) = min_block_needed { let mut theres_a_chance = false; - for potential_conn in self.conns.values() { + for potential_conn in self.by_name.values() { if skip_rpcs.contains(potential_conn) { continue; } @@ -951,23 +950,10 @@ impl Web3Rpcs { } } - if backups_included { - // if NotReady and we tried backups, there's no chance - warn!("No servers ready even after checking backups"); - break; - } + debug!("No servers ready. Waiting up for change in synced servers"); - debug!("No servers ready. Waiting up to 1 second for change in synced servers"); - - // TODO: exponential backoff? - tokio::select! { - _ = sleep(Duration::from_secs(1)) => { - // do NOT pop the last rpc off skip here - } - _ = watch_consensus_connections.changed() => { - watch_consensus_connections.borrow_and_update(); - } - } + watch_consensus_connections.changed().await?; + watch_consensus_connections.borrow_and_update(); } } } @@ -984,7 +970,7 @@ impl Web3Rpcs { .store(true, Ordering::Release); } - let num_conns = self.conns.len(); + let num_conns = self.by_name.len(); let num_skipped = skip_rpcs.len(); if num_skipped == 0 { @@ -1135,7 +1121,7 @@ impl fmt::Debug for Web3Rpcs { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // TODO: the default formatter takes forever to write. this is too quiet though f.debug_struct("Web3Rpcs") - .field("conns", &self.conns) + .field("rpcs", &self.by_name) .finish_non_exhaustive() } } @@ -1147,8 +1133,8 @@ impl Serialize for Web3Rpcs { { let mut state = serializer.serialize_struct("Web3Rpcs", 6)?; - let conns: Vec<&Web3Rpc> = self.conns.values().map(|x| x.as_ref()).collect(); - state.serialize_field("conns", &conns)?; + let rpcs: Vec<&Web3Rpc> = self.by_name.values().map(|x| x.as_ref()).collect(); + state.serialize_field("rpcs", &rpcs)?; { let consensus_connections = self.watch_consensus_rpcs_sender.borrow().clone(); @@ -1218,7 +1204,7 @@ mod tests { let blocks: Vec<_> = [block_0, block_1, block_2] .into_iter() - .map(|x| Web3ProxyBlock::new(Arc::new(x))) + .map(|x| Web3ProxyBlock::try_new(Arc::new(x)).unwrap()) .collect(); let mut rpcs: Vec<_> = [ @@ -1303,8 +1289,8 @@ mod tests { let lagged_block = Arc::new(lagged_block); let head_block = Arc::new(head_block); - let mut lagged_block: Web3ProxyBlock = lagged_block.into(); - let mut head_block: Web3ProxyBlock = head_block.into(); + let mut lagged_block: Web3ProxyBlock = lagged_block.try_into().unwrap(); + let mut head_block: Web3ProxyBlock = head_block.try_into().unwrap(); let block_data_limit = u64::MAX; @@ -1341,7 +1327,7 @@ mod tests { let head_rpc = Arc::new(head_rpc); let lagged_rpc = Arc::new(lagged_rpc); - let conns = HashMap::from([ + let rpcs_by_name = HashMap::from([ (head_rpc.name.clone(), head_rpc.clone()), (lagged_rpc.name.clone(), lagged_rpc.clone()), ]); @@ -1349,8 +1335,8 @@ mod tests { let (watch_consensus_rpcs_sender, _) = watch::channel(Default::default()); // TODO: make a Web3Rpcs::new - let conns = Web3Rpcs { - conns, + let rpcs = Web3Rpcs { + by_name: rpcs_by_name, watch_consensus_head_receiver: None, watch_consensus_rpcs_sender, pending_transactions: Cache::builder() @@ -1376,38 +1362,33 @@ mod tests { let mut consensus_finder = ConsensusFinder::new(&[0, 1, 2, 3], None, None); // process None so that - conns - .process_block_from_rpc( - &authorization, - &mut consensus_finder, - None, - lagged_rpc.clone(), - &head_block_sender, - &None, - ) - .await - .expect( - "its lagged, but it should still be seen as consensus if its the first to report", - ); - conns - .process_block_from_rpc( - &authorization, - &mut consensus_finder, - None, - head_rpc.clone(), - &head_block_sender, - &None, - ) - .await - .unwrap(); + rpcs.process_block_from_rpc( + &authorization, + &mut consensus_finder, + None, + lagged_rpc.clone(), + &head_block_sender, + &None, + ) + .await + .expect("its lagged, but it should still be seen as consensus if its the first to report"); + rpcs.process_block_from_rpc( + &authorization, + &mut consensus_finder, + None, + head_rpc.clone(), + &head_block_sender, + &None, + ) + .await + .unwrap(); // no head block because the rpcs haven't communicated through their channels - assert!(conns.head_block_hash().is_none()); + assert!(rpcs.head_block_hash().is_none()); // all_backend_connections gives all non-backup servers regardless of sync status assert_eq!( - conns - .all_connections(&authorization, None, None, None, false) + rpcs.all_connections(&authorization, None, None, None, false) .await .unwrap() .len(), @@ -1415,87 +1396,80 @@ mod tests { ); // best_synced_backend_connection requires servers to be synced with the head block - let x = conns + let x = rpcs .best_consensus_head_connection(&authorization, None, &[], None, None) .await .unwrap(); dbg!(&x); - assert!(matches!(x, OpenRequestResult::NotReady(true))); + assert!(matches!(x, OpenRequestResult::NotReady)); - // add lagged blocks to the conns. both servers should be allowed - lagged_block = conns.try_cache_block(lagged_block, true).await.unwrap(); + // add lagged blocks to the rpcs. both servers should be allowed + lagged_block = rpcs.try_cache_block(lagged_block, true).await.unwrap(); - conns - .process_block_from_rpc( - &authorization, - &mut consensus_finder, - Some(lagged_block.clone()), - lagged_rpc, - &head_block_sender, - &None, - ) - .await - .unwrap(); - conns - .process_block_from_rpc( - &authorization, - &mut consensus_finder, - Some(lagged_block.clone()), - head_rpc.clone(), - &head_block_sender, - &None, - ) - .await - .unwrap(); + rpcs.process_block_from_rpc( + &authorization, + &mut consensus_finder, + Some(lagged_block.clone()), + lagged_rpc, + &head_block_sender, + &None, + ) + .await + .unwrap(); + rpcs.process_block_from_rpc( + &authorization, + &mut consensus_finder, + Some(lagged_block.clone()), + head_rpc.clone(), + &head_block_sender, + &None, + ) + .await + .unwrap(); - assert_eq!(conns.num_synced_rpcs(), 2); + assert_eq!(rpcs.num_synced_rpcs(), 2); - // add head block to the conns. lagged_rpc should not be available - head_block = conns.try_cache_block(head_block, true).await.unwrap(); + // add head block to the rpcs. lagged_rpc should not be available + head_block = rpcs.try_cache_block(head_block, true).await.unwrap(); - conns - .process_block_from_rpc( - &authorization, - &mut consensus_finder, - Some(head_block.clone()), - head_rpc, - &head_block_sender, - &None, - ) - .await - .unwrap(); + rpcs.process_block_from_rpc( + &authorization, + &mut consensus_finder, + Some(head_block.clone()), + head_rpc, + &head_block_sender, + &None, + ) + .await + .unwrap(); - assert_eq!(conns.num_synced_rpcs(), 1); + assert_eq!(rpcs.num_synced_rpcs(), 1); assert!(matches!( - conns - .best_consensus_head_connection(&authorization, None, &[], None, None) + rpcs.best_consensus_head_connection(&authorization, None, &[], None, None) .await, Ok(OpenRequestResult::Handle(_)) )); assert!(matches!( - conns - .best_consensus_head_connection(&authorization, None, &[], Some(&0.into()), None) + rpcs.best_consensus_head_connection(&authorization, None, &[], Some(&0.into()), None) .await, Ok(OpenRequestResult::Handle(_)) )); assert!(matches!( - conns - .best_consensus_head_connection(&authorization, None, &[], Some(&1.into()), None) + rpcs.best_consensus_head_connection(&authorization, None, &[], Some(&1.into()), None) .await, Ok(OpenRequestResult::Handle(_)) )); // future block should not get a handle assert!(matches!( - conns - .best_consensus_head_connection(&authorization, None, &[], Some(&2.into()), None) + rpcs.best_consensus_head_connection(&authorization, None, &[], Some(&2.into()), None) .await, - Ok(OpenRequestResult::NotReady(true)) + Ok(OpenRequestResult::NotReady) )); } @@ -1522,7 +1496,7 @@ mod tests { ..Default::default() }; - let head_block: Web3ProxyBlock = Arc::new(head_block).into(); + let head_block: Web3ProxyBlock = Arc::new(head_block).try_into().unwrap(); let pruned_rpc = Web3Rpc { name: "pruned".to_string(), @@ -1554,7 +1528,7 @@ mod tests { let pruned_rpc = Arc::new(pruned_rpc); let archive_rpc = Arc::new(archive_rpc); - let conns = HashMap::from([ + let rpcs_by_name = HashMap::from([ (pruned_rpc.name.clone(), pruned_rpc.clone()), (archive_rpc.name.clone(), archive_rpc.clone()), ]); @@ -1562,8 +1536,8 @@ mod tests { let (watch_consensus_rpcs_sender, _) = watch::channel(Default::default()); // TODO: make a Web3Rpcs::new - let conns = Web3Rpcs { - conns, + let rpcs = Web3Rpcs { + by_name: rpcs_by_name, watch_consensus_head_receiver: None, watch_consensus_rpcs_sender, pending_transactions: Cache::builder() @@ -1576,7 +1550,7 @@ mod tests { .max_capacity(10) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), min_head_rpcs: 1, - min_sum_soft_limit: 3_000, + min_sum_soft_limit: 4_000, max_block_age: None, max_block_lag: None, }; @@ -1586,34 +1560,34 @@ mod tests { let (head_block_sender, _head_block_receiver) = watch::channel(Default::default()); let mut connection_heads = ConsensusFinder::new(&[0, 1, 2, 3], None, None); - conns - .process_block_from_rpc( - &authorization, - &mut connection_heads, - Some(head_block.clone()), - pruned_rpc.clone(), - &head_block_sender, - &None, - ) - .await - .unwrap(); - conns - .process_block_from_rpc( - &authorization, - &mut connection_heads, - Some(head_block.clone()), - archive_rpc.clone(), - &head_block_sender, - &None, - ) - .await - .unwrap(); + // min sum soft limit will require tier 2 + rpcs.process_block_from_rpc( + &authorization, + &mut connection_heads, + Some(head_block.clone()), + pruned_rpc.clone(), + &head_block_sender, + &None, + ) + .await + .unwrap_err(); - assert_eq!(conns.num_synced_rpcs(), 2); + rpcs.process_block_from_rpc( + &authorization, + &mut connection_heads, + Some(head_block.clone()), + archive_rpc.clone(), + &head_block_sender, + &None, + ) + .await + .unwrap(); + + assert_eq!(rpcs.num_synced_rpcs(), 2); // best_synced_backend_connection requires servers to be synced with the head block // TODO: test with and without passing the head_block.number? - let best_head_server = conns + let best_head_server = rpcs .best_consensus_head_connection( &authorization, None, @@ -1623,12 +1597,14 @@ mod tests { ) .await; + debug!("best_head_server: {:#?}", best_head_server); + assert!(matches!( best_head_server.unwrap(), OpenRequestResult::Handle(_) )); - let best_archive_server = conns + let best_archive_server = rpcs .best_consensus_head_connection(&authorization, None, &[], Some(&1.into()), None) .await; diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 8b4decc4..5b030bad 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -41,6 +41,7 @@ pub struct Web3RpcLatencies { impl Default for Web3RpcLatencies { fn default() -> Self { + todo!("use ewma crate, not u32"); Self { new_head: Histogram::new(3).unwrap(), new_head_ewma: 0, @@ -525,7 +526,7 @@ impl Web3Rpc { None } Ok(Some(new_head_block)) => { - let new_head_block = Web3ProxyBlock::new(new_head_block); + let new_head_block = Web3ProxyBlock::try_new(new_head_block).unwrap(); let new_hash = *new_head_block.hash(); @@ -955,7 +956,7 @@ impl Web3Rpc { sleep_until(retry_at).await; } - Ok(OpenRequestResult::NotReady(_)) => { + Ok(OpenRequestResult::NotReady) => { // TODO: when can this happen? log? emit a stat? trace!("{} has no handle ready", self); @@ -987,7 +988,7 @@ impl Web3Rpc { if unlocked_provider.is_some() || self.provider.read().await.is_some() { // we already have an unlocked provider. no need to lock } else { - return Ok(OpenRequestResult::NotReady(self.backup)); + return Ok(OpenRequestResult::NotReady); } if let Some(hard_limit_until) = self.hard_limit_until.as_ref() { @@ -1029,7 +1030,7 @@ impl Web3Rpc { return Ok(OpenRequestResult::RetryAt(retry_at)); } RedisRateLimitResult::RetryNever => { - return Ok(OpenRequestResult::NotReady(self.backup)); + return Ok(OpenRequestResult::NotReady); } } }; @@ -1165,7 +1166,7 @@ mod tests { let random_block = Arc::new(random_block); - let head_block = Web3ProxyBlock::new(random_block); + let head_block = Web3ProxyBlock::try_new(random_block).unwrap(); let block_data_limit = u64::MAX; let x = Web3Rpc { @@ -1201,7 +1202,8 @@ mod tests { timestamp: now, ..Default::default() }) - .into(); + .try_into() + .unwrap(); let block_data_limit = 64; diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index 2c66307e..b3f4864a 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -20,9 +20,8 @@ pub enum OpenRequestResult { Handle(OpenRequestHandle), /// Unable to start a request. Retry at the given time. RetryAt(Instant), - /// Unable to start a request because the server is not synced - /// contains "true" if backup servers were attempted - NotReady(bool), + /// Unable to start a request because no servers are synced + NotReady, } /// Make RPC requests through this handle and drop it when you are done. From af7724e7946f5088d022658f632f721613faf68f Mon Sep 17 00:00:00 2001 From: yenicelik Date: Wed, 15 Feb 2023 14:54:52 +0100 Subject: [PATCH 25/47] using the webscript did the trick --- scripts/manual-tests/16-change-user-tier.sh | 32 ++++++++++++--------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/scripts/manual-tests/16-change-user-tier.sh b/scripts/manual-tests/16-change-user-tier.sh index 7e865207..3270e3e3 100644 --- a/scripts/manual-tests/16-change-user-tier.sh +++ b/scripts/manual-tests/16-change-user-tier.sh @@ -15,14 +15,23 @@ RUSTFLAGS="--cfg tokio_unstable" cargo run --release -- proxyd # Check if the instance is running curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"web3_clientVersion","id":1}' 127.0.0.1:8544 -# Login in the user first (add a random bearer token into the database) -# (This segment was not yet tested, but should next time you run the query) -INSERT INTO login (bearer_token, user_id, expires_at, read_only) VALUES ( - "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c", - 1, - "2222-01-01", - FALSE -); +# Open this website to get the nonce to log in +curl -X GET "http://127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a" + +# Use this site to sign a message +# https://www.myetherwallet.com/wallet/sign (whatever is output with the above code) +curl -X POST http://127.0.0.1:8544/user/login \ + -H 'Content-Type: application/json' \ + -d '{"address": "0xeb3e928a2e54be013ef8241d4c9eaf4dfae94d5a", "msg": "0x6c6c616d616e6f6465732e636f6d2077616e747320796f7520746f207369676e20696e207769746820796f757220457468657265756d206163636f756e743a0a3078654233453932384132453534424530313345463832343164344339456146344466414539344435610a0af09fa699f09fa699f09fa699f09fa699f09fa6990a0a5552493a2068747470733a2f2f6c6c616d616e6f6465732e636f6d2f0a56657273696f6e3a20310a436861696e2049443a20310a4e6f6e63653a2030314753414e37464d47574335314e50544737343338384a44350a4973737565642041743a20323032332d30322d31355431333a34363a33372e3037323739335a0a45787069726174696f6e2054696d653a20323032332d30322d31355431343a30363a33372e3037323739335a", "sig": "2d2eb576b2e6d05845710b7229f2a1ff9707e928fdcf571d1ce0ae094577e4310873fa1376c69440b60d6a1c76c62a4586b9d6426fb6559dee371e490d708f3e1b", "version": "3", "signer": "MEW"}' + +## Login in the user first (add a random bearer token into the database) +## (This segment was not yet tested, but should next time you run the query) +#INSERT INTO login (bearer_token, user_id, expires_at, read_only) VALUES ( +# "01GSAMZ6QY7KH9AQ", +# 1, +# "2024-01-01", +# FALSE +#); #curl -X POST -H "Content-Type: application/json" --data '{}' 127.0.0.1:8544/user/login #curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/" @@ -30,9 +39,6 @@ INSERT INTO login (bearer_token, user_id, expires_at, read_only) VALUES ( # Now modify the user role and check this in the database curl \ --H "Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9" \ --X GET "127.0.0.1:8544/admin/modify_role?user_address=0x077e43dcca20da9859daa3fd78b5998b81f794f7&user_tier_title=1&user_id=1" - -curl \ --H "Authorization: Bearer QWxhZGRpbjpvcGVuIHNlc2FtZQ==" \ +-H "Authorization: Bearer 01GSANKVBB22D5P2351P4Y42NV" \ -X GET "127.0.0.1:8544/admin/modify_role?user_address=0x077e43dcca20da9859daa3fd78b5998b81f794f7&user_tier_title=Unlimited&user_id=1" + From 36c884c9ab910818dcb8936e3c04b2c3c31f258c Mon Sep 17 00:00:00 2001 From: yenicelik Date: Wed, 15 Feb 2023 14:57:34 +0100 Subject: [PATCH 26/47] redis is not saving raw bearer tokens, so i will remove this from here --- web3_proxy/src/admin_queries.rs | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index c86e0ba8..ff2b7841 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -108,24 +108,6 @@ pub async fn query_admin_modify_usertier<'a>( .all(db_replica.conn()) .await?; - // TODO: Remove from Redis - // Remove multiple items simultaneously, but this should be quick let's not prematurely optimize - let recent_user_id_key = format!("recent_users:id:{}", app.config.chain_id); - let salt = app - .config - .public_recent_ips_salt - .as_ref() - .expect("public_recent_ips_salt must exist in here"); - - // TODO: How do I remove the redis items (?) - for bearer_token in bearer_tokens { - let salted_user_id = format!("{}:{}", salt, bearer_token.user_id); - let hashed_user_id = Bytes::from(keccak256(salted_user_id.as_bytes())); - redis_conn - .zrem(&recent_user_id_key, hashed_user_id.to_string()) - .await?; - } - // Now delete these tokens ... login::Entity::delete_many() .filter(login::Column::UserId.eq(user.id)) From 44ae4971001dc5e7c4f86c05a0304fbc5d7d157d Mon Sep 17 00:00:00 2001 From: yenicelik Date: Wed, 15 Feb 2023 15:20:16 +0100 Subject: [PATCH 27/47] currently also have to specify the admin user calling the request. also added a tiny manual test to imitate user. will add trails next --- scripts/manual-tests/16-change-user-tier.sh | 2 +- scripts/manual-tests/19-admin-imitate-user.sh | 33 ++++++++++++------- web3_proxy/src/admin_queries.rs | 8 ++--- web3_proxy/src/frontend/admin.rs | 6 +--- web3_proxy/src/frontend/mod.rs | 4 +-- 5 files changed, 30 insertions(+), 23 deletions(-) diff --git a/scripts/manual-tests/16-change-user-tier.sh b/scripts/manual-tests/16-change-user-tier.sh index 3270e3e3..2505935b 100644 --- a/scripts/manual-tests/16-change-user-tier.sh +++ b/scripts/manual-tests/16-change-user-tier.sh @@ -1,5 +1,5 @@ -# docker-compose up -d # rm -rf data/ +# docker-compose up -d # sea-orm-cli migrate up # Use CLI to create the admin that will call the endpoint diff --git a/scripts/manual-tests/19-admin-imitate-user.sh b/scripts/manual-tests/19-admin-imitate-user.sh index 22777444..de20f3c5 100644 --- a/scripts/manual-tests/19-admin-imitate-user.sh +++ b/scripts/manual-tests/19-admin-imitate-user.sh @@ -1,27 +1,38 @@ -# Admin can login as a user ... (but again, we must first have logged in -# docker-compose up -d # rm -rf data/ +# docker-compose up -d # sea-orm-cli migrate up +# Use CLI to create the admin that will call the endpoint RUSTFLAGS="--cfg tokio_unstable" cargo run create_user --address 0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a RUSTFLAGS="--cfg tokio_unstable" cargo run change_admin_status 0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a true +# Use CLI to create the user whose role will be changed via the endpoint +RUSTFLAGS="--cfg tokio_unstable" cargo run create_user --address 0x077e43dcca20da9859daa3fd78b5998b81f794f7 + # Run the proxyd instance -# cargo run --release -- proxyd +RUSTFLAGS="--cfg tokio_unstable" cargo run --release -- proxyd # Check if the instance is running -# curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"web3_clientVersion","id":1}' 127.0.0.1:8544 +curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"web3_clientVersion","id":1}' 127.0.0.1:8544 -# Login as user first -curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a" -#curl -X POST -H "Content-Type: application/json" --data '{}' 127.0.0.1:8544/user/login -curl -X GET "127.0.0.1:8544/user/login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/" +# Open this website to get the nonce to log in +curl \ +-H "Authorization: Bearer 01GSANKVBB22D5P2351P4Y42NV" \ +-X GET "http://127.0.0.1:8544/admin/imitate-login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/0x077e43dcca20da9859daa3fd78b5998b81f794f7" + +# Use this site to sign a message +# https://www.myetherwallet.com/wallet/sign (whatever is output with the above code) +curl -X POST http://127.0.0.1:8544/admin/imitate-login \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer 01GSANKVBB22D5P2351P4Y42NV" \ + -d '{"address": "0xeb3e928a2e54be013ef8241d4c9eaf4dfae94d5a", "msg": "0x6c6c616d616e6f6465732e636f6d2077616e747320796f7520746f207369676e20696e207769746820796f757220457468657265756d206163636f756e743a0a3078654233453932384132453534424530313345463832343164344339456146344466414539344435610a0af09fa699f09fa699f09fa699f09fa699f09fa6990a0a5552493a2068747470733a2f2f6c6c616d616e6f6465732e636f6d2f0a56657273696f6e3a20310a436861696e2049443a20310a4e6f6e63653a20303147534150545132413932415332435752563158504d4347470a4973737565642041743a20323032332d30322d31355431343a31343a33352e3835303636385a0a45787069726174696f6e2054696d653a20323032332d30322d31355431343a33343a33352e3835303636385a", "sig": "d5fed789e98769b8b726a79f222f2e06476de15948d35c167c4f294bb98edf42244edc703b6d729e5d08bd73c318fc9729b985022229c7669a945d64da47ab641c", "version": "3", "signer": "MEW"}' # Now modify the user role and check this in the database +# 01GSAMMWQ41TVVH3DH8MSEP8X6 # Now we can get a bearer-token to imitate the user -curl -X GET "127.0.0.1:8544/admin/imitate-login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a" -#curl -X POST -H "Content-Type: application/json" --data '{}' 127.0.0.1:8544/user/login -curl -X GET "127.0.0.1:8544/admin/imitate-login/0xeB3E928A2E54BE013EF8241d4C9EaF4DfAE94D5a/" +curl \ +-H "Authorization: Bearer 01GSAPZNVZ96ADJAEZ1VTRSA5T" \ +-X GET "127.0.0.1:8544/user/keys" # docker-compose down diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index ff2b7841..15966188 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -65,7 +65,7 @@ pub async fn query_admin_modify_usertier<'a>( // Check if the caller is an admin (i.e. if he is in an admin table) let admin: admin::Model = admin::Entity::find() .filter(admin::Column::UserId.eq(caller_id)) - .one(db_replica.conn()) + .one(&db_conn) .await? .ok_or(FrontendErrorResponse::AccessDenied)?; @@ -74,7 +74,7 @@ pub async fn query_admin_modify_usertier<'a>( // Fetch the admin, and the user let user: user::Model = user::Entity::find() .filter(user::Column::Address.eq(user_address)) - .one(db_replica.conn()) + .one(&db_conn) .await? .ok_or(FrontendErrorResponse::BadRequest("No user with this id found".to_string()))?; // Return early if the target user_tier_id is the same as the original user_tier_id @@ -86,7 +86,7 @@ pub async fn query_admin_modify_usertier<'a>( // Now we can modify the user's tier let new_user_tier: user_tier::Model = user_tier::Entity::find() .filter(user_tier::Column::Title.eq(user_tier_title.clone())) - .one(db_replica.conn()) + .one(&db_conn) .await? .ok_or(FrontendErrorResponse::BadRequest("User Tier name was not found".to_string()))?; @@ -105,7 +105,7 @@ pub async fn query_admin_modify_usertier<'a>( // Query the login table, and get all bearer tokens by this user let bearer_tokens = login::Entity::find() .filter(login::Column::UserId.eq(user.id)) - .all(db_replica.conn()) + .all(&db_conn) .await?; // Now delete these tokens ... diff --git a/web3_proxy/src/frontend/admin.rs b/web3_proxy/src/frontend/admin.rs index 8ef18785..c0082f48 100644 --- a/web3_proxy/src/frontend/admin.rs +++ b/web3_proxy/src/frontend/admin.rs @@ -173,11 +173,7 @@ pub async fn admin_login_get( .filter(user::Column::Address.eq(user_address)) .one(db_replica.conn()) .await? - .ok_or(FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Could not find user in db".to_string(), - None, - ))?; + .ok_or(FrontendErrorResponse::BadRequest("Could not find user in db".to_string()))?; // Can there be two login-sessions at the same time? // I supposed if the user logs in, the admin would be logged out and vice versa diff --git a/web3_proxy/src/frontend/mod.rs b/web3_proxy/src/frontend/mod.rs index 2ca61893..3c209fae 100644 --- a/web3_proxy/src/frontend/mod.rs +++ b/web3_proxy/src/frontend/mod.rs @@ -169,9 +169,9 @@ pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<() .route("/user/stats/detailed", get(users::user_stats_detailed_get)) .route("/user/logout", post(users::user_logout_post)) .route("/admin/modify_role", get(admin::admin_change_user_roles)) - .route("/admin/imitate-login/:user_address", get(admin::admin_login_get)) + .route("/admin/imitate-login/:admin_address/:user_address", get(admin::admin_login_get)) .route( - "/admin/imitate-login/:user_address/:message_eip", + "/admin/imitate-login/:admin_address/:user_address/:message_eip", get(admin::admin_login_get), ) .route("/admin/imitate-login", post(admin::admin_login_post)) From 46f715d3ccfb8b0fa914282a61b922e0b6abacaf Mon Sep 17 00:00:00 2001 From: yenicelik Date: Wed, 15 Feb 2023 16:12:21 +0100 Subject: [PATCH 28/47] added admin trail table --- migration/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/src/lib.rs b/migration/src/lib.rs index 7c0cbe0f..fe7e3ec6 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -16,6 +16,7 @@ mod m20230117_191358_admin_table; mod m20230119_204135_better_free_tier; mod m20230130_124740_read_only_login_logic; mod m20230130_165144_prepare_admin_imitation_pre_login; +mod m20230215_152254_admin_trail; pub struct Migrator; @@ -39,6 +40,7 @@ impl MigratorTrait for Migrator { Box::new(m20230119_204135_better_free_tier::Migration), Box::new(m20230130_124740_read_only_login_logic::Migration), Box::new(m20230130_165144_prepare_admin_imitation_pre_login::Migration), + Box::new(m20230215_152254_admin_trail::Migration), ] } } From 499610add7b696a939c5d949d0f490d27696bc7c Mon Sep 17 00:00:00 2001 From: yenicelik Date: Wed, 15 Feb 2023 16:29:30 +0100 Subject: [PATCH 29/47] added admin trail table --- entities/src/admin_trail.rs | 37 ++++++++ entities/src/mod.rs | 1 + entities/src/prelude.rs | 2 + migration/src/m20230215_152254_admin_trail.rs | 93 +++++++++++++++++++ 4 files changed, 133 insertions(+) create mode 100644 entities/src/admin_trail.rs create mode 100644 migration/src/m20230215_152254_admin_trail.rs diff --git a/entities/src/admin_trail.rs b/entities/src/admin_trail.rs new file mode 100644 index 00000000..26ad1e3b --- /dev/null +++ b/entities/src/admin_trail.rs @@ -0,0 +1,37 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6 + +use sea_orm::entity::prelude::*; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] +#[sea_orm(table_name = "admin_trail")] +pub struct Model { + #[sea_orm(primary_key)] + pub id: i32, + pub caller: u64, + pub imitating_user: Option, + pub endpoint: String, + pub payload: String, + pub timestamp: DateTimeUtc, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::user::Entity", + from = "Column::Caller", + to = "super::user::Column::Id", + on_update = "NoAction", + on_delete = "NoAction" + )] + User2, + #[sea_orm( + belongs_to = "super::user::Entity", + from = "Column::ImitatingUser", + to = "super::user::Column::Id", + on_update = "NoAction", + on_delete = "NoAction" + )] + User1, +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/entities/src/mod.rs b/entities/src/mod.rs index f0ac2711..2121477c 100644 --- a/entities/src/mod.rs +++ b/entities/src/mod.rs @@ -3,6 +3,7 @@ pub mod prelude; pub mod admin; +pub mod admin_trail; pub mod login; pub mod pending_login; pub mod revert_log; diff --git a/entities/src/prelude.rs b/entities/src/prelude.rs index 218a3d47..bb19388d 100644 --- a/entities/src/prelude.rs +++ b/entities/src/prelude.rs @@ -1,5 +1,7 @@ //! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +pub use super::admin::Entity as Admin; +pub use super::admin_trail::Entity as AdminTrail; pub use super::login::Entity as Login; pub use super::pending_login::Entity as PendingLogin; pub use super::revert_log::Entity as RevertLog; diff --git a/migration/src/m20230215_152254_admin_trail.rs b/migration/src/m20230215_152254_admin_trail.rs new file mode 100644 index 00000000..994361ee --- /dev/null +++ b/migration/src/m20230215_152254_admin_trail.rs @@ -0,0 +1,93 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .create_table( + Table::create() + .table(AdminTrail::Table) + .if_not_exists() + .col( + ColumnDef::new(AdminTrail::Id) + .integer() + .not_null() + .auto_increment() + .primary_key(), + ) + .col( + ColumnDef::new(AdminTrail::Caller) + .big_unsigned() + .not_null() + // TODO: Add Foreign Key + ) + .foreign_key( + sea_query::ForeignKey::create() + .from(AdminTrail::Table, AdminTrail::Caller) + .to(User::Table, User::Id), + ) + .col( + ColumnDef::new(AdminTrail::ImitatingUser) + .big_unsigned() + // Can be null bcs maybe we're just logging in / using endpoints that don't imitate a user + // TODO: Add Foreign Key + ) + .foreign_key( + sea_query::ForeignKey::create() + .from(AdminTrail::Table, AdminTrail::ImitatingUser) + .to(User::Table, User::Id), + ) + .col( + ColumnDef::new(AdminTrail::Endpoint) + .string() + .not_null() + ) + .col( + ColumnDef::new(AdminTrail::Payload) + .string() + .not_null() + ) + .col( + ColumnDef::new(AdminTrail::Timestamp) + .timestamp() + .not_null() + .extra("DEFAULT CURRENT_TIMESTAMP".to_string()) + ) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // Replace the sample below with your own migration scripts + manager + .drop_table(Table::drop().table(AdminTrail::Table).to_owned()) + .await + } +} + +/// Learn more at https://docs.rs/sea-query#iden +#[derive(Iden)] +enum AdminTrail { + Table, + Id, + Caller, + ImitatingUser, + Endpoint, + Payload, + Timestamp +} + + +/// Learn more at https://docs.rs/sea-query#iden +#[derive(Iden)] +enum User { + Table, + Id, + Address, + Description, + Email, +} \ No newline at end of file From d6a7b702f88a8fa751d68fd468596aa03a64633f Mon Sep 17 00:00:00 2001 From: yenicelik Date: Wed, 15 Feb 2023 17:10:45 +0100 Subject: [PATCH 30/47] added admin trail when someone tries to login for godmode --- web3_proxy/src/admin_queries.rs | 3 ++ web3_proxy/src/frontend/admin.rs | 84 +++++++++++++++++--------------- 2 files changed, 49 insertions(+), 38 deletions(-) diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index 15966188..0f8de6e1 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -55,6 +55,9 @@ pub async fn query_admin_modify_usertier<'a>( .context("query_admin_modify_user had a redis connection error")? .context("query_admin_modify_user needs a redis")?; + // Will modify logic here + + // Try to get the user who is calling from redis (if existent) / else from the database // TODO: Make a single query, where you retrieve the user, and directly from it the secondary user (otherwise we do two jumpy, which is unnecessary) // get the user id first. if it is 0, we should use a cache on the app diff --git a/web3_proxy/src/frontend/admin.rs b/web3_proxy/src/frontend/admin.rs index c0082f48..286c1d44 100644 --- a/web3_proxy/src/frontend/admin.rs +++ b/web3_proxy/src/frontend/admin.rs @@ -21,8 +21,8 @@ use axum_client_ip::InsecureClientIp; use axum_macros::debug_handler; use chrono::{TimeZone, Utc}; use entities::sea_orm_active_enums::{LogLevel, Role}; -use entities::{admin, login, pending_login, revert_log, rpc_key, secondary_user, user, user_tier}; -use ethers::{prelude::Address, types::Bytes}; +use entities::{admin, admin_trail, login, pending_login, revert_log, rpc_key, secondary_user, user, user_tier}; +use ethers::{abi::AbiEncode, prelude::Address, types::Bytes}; use hashbrown::HashMap; use http::{HeaderValue, StatusCode}; use ipnet::IpNet; @@ -60,7 +60,7 @@ pub async fn admin_change_user_roles( Ok(response) } -/// `GET /admin/login/:user_address` -- Being an admin, login as a user in read-only mode +/// `GET /admin/imitate-login/:admin_address/:user_address` -- Being an admin, login as a user in read-only mode /// /// - user_address that is to be logged in by /// We assume that the admin has already logged in, and has a bearer token ... @@ -95,41 +95,17 @@ pub async fn admin_login_get( // get the admin field ... let admin_address: Address = params .get("admin_address") - .ok_or_else(|| - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to find admin_address key in request".to_string(), - None, - ) - )? + .ok_or_else(|| FrontendErrorResponse::BadRequest("Unable to find admin_address key in request".to_string()))? .parse::
() - .map_err(|err| { - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to parse user_address as an Address".to_string(), - Some(err.into()) - ) - })?; + .map_err(|err| { FrontendErrorResponse::BadRequest("Unable to parse user_address as an Address".to_string()) })?; // Fetch the user_address parameter from the login string ... (as who we want to be logging in ...) let user_address: Vec = params .get("user_address") - .ok_or_else(|| - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to find user_address key in request".to_string(), - None, - ) - )? + .ok_or_else(|| FrontendErrorResponse::BadRequest("Unable to find user_address key in request".to_string()))? .parse::
() - .map_err(|err| { - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to parse user_address as an Address".to_string(), - Some(err.into()), - ) - })? + .map_err(|err| { FrontendErrorResponse::BadRequest("Unable to parse user_address as an Address".to_string(), ) })? .to_fixed_bytes().into(); // We want to login to llamanodes.com @@ -149,7 +125,7 @@ pub async fn admin_login_get( // TODO: accept a login_domain from the request? domain: login_domain.parse().unwrap(), // In the case of the admin, the admin needs to sign the message, so we include this logic ... - address: admin_address.to_fixed_bytes(),// user_address.to_fixed_bytes(), + address: admin_address.to_fixed_bytes(), // user_address.to_fixed_bytes(), // TODO: config for statement statement: Some("🦙🦙🦙🦙🦙".to_string()), // TODO: don't unwrap @@ -175,6 +151,25 @@ pub async fn admin_login_get( .await? .ok_or(FrontendErrorResponse::BadRequest("Could not find user in db".to_string()))?; + let admin = user::Entity::find() + .filter(user::Column::Address.eq(admin_address.encode())) + .one(db_replica.conn()) + .await? + .ok_or(FrontendErrorResponse::BadRequest("Could not find admin in db".to_string()))?; + + // Note that the admin is trying to log in as this user + let trail = admin_trail::ActiveModel { + caller: sea_orm::Set(admin.id), + imitating_user: sea_orm::Set(Some(user.id)), + endpoint: sea_orm::Set("admin_login_get".to_string()), + payload: sea_orm::Set(format!("{:?}", params)), + ..Default::default() + }; + trail + .save(&db_conn) + .await + .context("saving user's pending_login")?; + // Can there be two login-sessions at the same time? // I supposed if the user logs in, the admin would be logged out and vice versa @@ -198,7 +193,7 @@ pub async fn admin_login_get( user_pending_login .save(&db_conn) .await - .context("saving user's pending_login")?; + .context("saving an admin trail pre login")?; // there are multiple ways to sign messages and not all wallets support them // TODO: default message eip from config? @@ -284,6 +279,10 @@ pub async fn admin_login_post( // default options are fine. the message includes timestamp and domain and nonce let verify_config = VerificationOpts::default(); + let db_conn = app + .db_conn() + .context("deleting expired pending logins requires a db")?; + if let Err(err_1) = our_msg .verify(&their_sig, &verify_config) .await @@ -294,9 +293,6 @@ pub async fn admin_login_post( .verify_eip191(&their_sig) .context("verifying eip191 signature against our local message") { - let db_conn = app - .db_conn() - .context("deleting expired pending logins requires a db")?; // delete ALL expired rows. let now = Utc::now(); @@ -335,6 +331,20 @@ pub async fn admin_login_post( .await? .context("admin address was not found!")?; + // Add a message that the admin has logged in + // Note that the admin is trying to log in as this user + let trail = admin_trail::ActiveModel { + caller: sea_orm::Set(admin.id), + imitating_user: sea_orm::Set(Some(imitating_user.id)), + endpoint: sea_orm::Set("admin_login_post".to_string()), + payload: sea_orm::Set(format!("{:?}", payload)), + ..Default::default() + }; + trail + .save(&db_conn) + .await + .context("saving an admin trail post login")?; + // I supposed we also get the rpc_key, whatever this is used for (?). // I think the RPC key should still belong to the admin though in this case ... @@ -381,8 +391,6 @@ pub async fn admin_login_post( read_only: sea_orm::Set(true) }; - let db_conn = app.db_conn().context("Getting database connection")?; - user_login .save(&db_conn) .await From 5fd440523f390644524204fc4e99ee273f12f786 Mon Sep 17 00:00:00 2001 From: yenicelik Date: Wed, 15 Feb 2023 17:13:31 +0100 Subject: [PATCH 31/47] removed one comment (not needed to remove redis) --- .../web3_proxy_cli/change_user_admin_status.rs | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs b/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs index 1ee46659..081cd8a0 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/change_user_admin_status.rs @@ -71,22 +71,6 @@ impl ChangeUserAdminStatusSubCommand { .all(db_conn) .await?; - // // TODO: Remove from Redis - // // Remove multiple items simultaneously, but this should be quick let's not prematurely optimize - // let recent_user_id_key = format!("recent_users:id:{}", app.config.chain_id); - // let salt = app - // .config - // .public_recent_ips_salt - // .as_ref() - // .expect("public_recent_ips_salt must exist in here"); - // - // // TODO: Also clear redis ... - // let salted_user_id = format!("{}:{}", salt, bearer_token.user_id); - // let hashed_user_id = Bytes::from(keccak256(salted_user_id.as_bytes())); - // redis_conn - // .zrem(&recent_user_id_key, hashed_user_id.to_string()) - // .await?; - // Remove any user logins from the database (incl. bearer tokens) let delete_result = login::Entity::delete_many() .filter(login::Column::UserId.eq(user.id)) From 8506cd6bdcce4676510885ddc1ed22af81bcc803 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 11:40:55 -0800 Subject: [PATCH 32/47] cargo upgrade --- Cargo.lock | 17 +++++++++++++++++ web3_proxy/Cargo.toml | 2 ++ 2 files changed, 19 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 48d61556..f2b417c0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1803,6 +1803,12 @@ version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" +[[package]] +name = "ewma" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f20267f3a8b678b7151c0c508002e79126144a5d47badddec7f31ddc1f4c754" + [[package]] name = "eyre" version = "0.6.8" @@ -3128,6 +3134,15 @@ dependencies = [ "syn", ] +[[package]] +name = "ordered-float" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d84eb1409416d254e4a9c8fa56cc24701755025b458f0fcd8e59e1f5f40c23bf" +dependencies = [ + "num-traits", +] + [[package]] name = "os_info" version = "3.6.0" @@ -5770,6 +5785,7 @@ dependencies = [ "entities", "env_logger", "ethers", + "ewma", "fdlimit", "flume", "futures", @@ -5788,6 +5804,7 @@ dependencies = [ "num", "num-traits", "once_cell", + "ordered-float", "pagerduty-rs", "parking_lot 0.12.1", "prettytable", diff --git a/web3_proxy/Cargo.toml b/web3_proxy/Cargo.toml index 7d8ab888..04da5704 100644 --- a/web3_proxy/Cargo.toml +++ b/web3_proxy/Cargo.toml @@ -74,3 +74,5 @@ tower-http = { version = "0.3.5", features = ["cors", "sensitive-headers"] } ulid = { version = "1.0.0", features = ["serde"] } url = "2.3.1" uuid = "1.3.0" +ewma = "0.1.1" +ordered-float = "3.4.0" From df668a5dfdee1d90013ba2edce0ce2a2e398869a Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 11:42:25 -0800 Subject: [PATCH 33/47] add time to idle to more caches --- web3_proxy/src/app/mod.rs | 9 ++++++++- web3_proxy/src/rpcs/many.rs | 22 ++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index bcb54937..303ea187 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -556,9 +556,12 @@ impl Web3ProxyApp { // TODO: ttl on this? or is max_capacity fine? let pending_transactions = Cache::builder() .max_capacity(10_000) + // TODO: different chains might handle this differently + // TODO: what should we set? 5 minutes is arbitrary. the nodes themselves hold onto transactions for much longer + .time_to_idle(Duration::from_secs(300)) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); - // keep 1GB of blocks in the cache + // keep 1GB/5 minutes of blocks in the cache // TODO: limits from config // these blocks don't have full transactions, but they do have rather variable amounts of transaction hashes // TODO: how can we do the weigher better? @@ -568,6 +571,8 @@ impl Web3ProxyApp { // TODO: is this good enough? 1 + v.block.transactions.len().try_into().unwrap_or(u32::MAX) }) + // TODO: what should we set? 5 minutes is arbitrary. the nodes themselves hold onto transactions for much longer + .time_to_idle(Duration::from_secs(300)) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); // connect to the load balanced rpcs @@ -690,6 +695,8 @@ impl Web3ProxyApp { u32::MAX } }) + // TODO: what should we set? 10 minutes is arbitrary. the nodes themselves hold onto transactions for much longer + .time_to_idle(Duration::from_secs(600)) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); // all the users are the same size, so no need for a weigher diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index c449b241..19958016 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -20,6 +20,7 @@ use itertools::Itertools; use log::{debug, error, info, trace, warn, Level}; use migration::sea_orm::DatabaseConnection; use moka::future::{Cache, ConcurrentCacheExt}; +use ordered_float::OrderedFloat; use serde::ser::{SerializeStruct, Serializer}; use serde::Serialize; use serde_json::json; @@ -572,7 +573,9 @@ impl Web3Rpcs { // pick the first two and try the one with the lower rpc.latency.ewma // TODO: chunks or tuple windows? for (rpc_a, rpc_b) in usable_rpcs.into_iter().circular_tuple_windows() { - let best_rpc = min_by_key(rpc_a, rpc_b, |x| x.latency.request_ewma); + let best_rpc = min_by_key(rpc_a, rpc_b, |x| { + OrderedFloat(x.request_latency.read().ewma.value()) + }); // just because it has lower latency doesn't mean we are sure to get a connection match best_rpc.try_request_handle(authorization, None).await { @@ -1154,18 +1157,19 @@ impl Serialize for Web3Rpcs { /// sort by block number (descending) and tier (ascending) /// TODO: should this be moved into a `impl Web3Rpc`? +/// TODO: i think we still have sorts scattered around the code that should use this /// TODO: take AsRef or something like that? We don't need an Arc here -fn rpc_sync_status_sort_key(x: &Arc) -> (u64, u64, u32) { - let reversed_head_block = u64::MAX +fn rpc_sync_status_sort_key(x: &Arc) -> (U64, u64, OrderedFloat) { + let reversed_head_block = U64::MAX - x.head_block .read() .as_ref() - .map(|x| x.number().as_u64()) - .unwrap_or(0); + .map(|x| *x.number()) + .unwrap_or_default(); let tier = x.tier; - let request_ewma = x.latency.request_ewma; + let request_ewma = OrderedFloat(x.request_latency.read().ewma.value()); (reversed_head_block, tier, request_ewma) } @@ -1340,13 +1344,10 @@ mod tests { watch_consensus_head_receiver: None, watch_consensus_rpcs_sender, pending_transactions: Cache::builder() - .max_capacity(10_000) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), block_hashes: Cache::builder() - .max_capacity(10_000) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), block_numbers: Cache::builder() - .max_capacity(10_000) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), // TODO: test max_block_age? max_block_age: None, @@ -1541,13 +1542,10 @@ mod tests { watch_consensus_head_receiver: None, watch_consensus_rpcs_sender, pending_transactions: Cache::builder() - .max_capacity(10) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), block_hashes: Cache::builder() - .max_capacity(10) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), block_numbers: Cache::builder() - .max_capacity(10) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()), min_head_rpcs: 1, min_sum_soft_limit: 4_000, From 1fb4dd6ccc021ee22c92091c395e95c9198387c1 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 12:33:43 -0800 Subject: [PATCH 34/47] i think it works --- web3_proxy/src/rpcs/blockchain.rs | 30 ++++++-- web3_proxy/src/rpcs/consensus.rs | 88 ++++++++++++++++-------- web3_proxy/src/rpcs/many.rs | 5 +- web3_proxy/src/rpcs/one.rs | 109 +++++++++++++++++++++--------- web3_proxy/src/rpcs/request.rs | 12 ++++ 5 files changed, 174 insertions(+), 70 deletions(-) diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs index 9aa018a0..cd8957f5 100644 --- a/web3_proxy/src/rpcs/blockchain.rs +++ b/web3_proxy/src/rpcs/blockchain.rs @@ -415,6 +415,8 @@ impl Web3Rpcs { // TODO: what should we do if the block number of new_synced_connections is < old_synced_connections? wait? + let consensus_tier = new_synced_connections.tier; + let total_tiers = consensus_finder.len(); let backups_needed = new_synced_connections.backups_needed; let consensus_head_block = new_synced_connections.head_block.clone(); let num_consensus_rpcs = new_synced_connections.num_conns(); @@ -434,7 +436,9 @@ impl Web3Rpcs { match &old_consensus_head_connections.head_block { None => { debug!( - "first {}{}/{}/{} block={}, rpc={}", + "first {}/{} {}{}/{}/{} block={}, rpc={}", + consensus_tier, + total_tiers, backups_voted_str, num_consensus_rpcs, num_active_rpcs, @@ -469,7 +473,9 @@ impl Web3Rpcs { // no change in hash. no need to use head_block_sender // TODO: trace level if rpc is backup debug!( - "con {}{}/{}/{} con={} rpc={}@{}", + "con {}/{} {}{}/{}/{} con={} rpc={}@{}", + consensus_tier, + total_tiers, backups_voted_str, num_consensus_rpcs, num_active_rpcs, @@ -486,7 +492,9 @@ impl Web3Rpcs { } debug!( - "unc {}{}/{}/{} con_head={} old={} rpc={}@{}", + "unc {}/{} {}{}/{}/{} con_head={} old={} rpc={}@{}", + consensus_tier, + total_tiers, backups_voted_str, num_consensus_rpcs, num_active_rpcs, @@ -511,7 +519,9 @@ impl Web3Rpcs { // this is unlikely but possible // TODO: better log warn!( - "chain rolled back {}{}/{}/{} con={} old={} rpc={}@{}", + "chain rolled back {}/{} {}{}/{}/{} con={} old={} rpc={}@{}", + consensus_tier, + total_tiers, backups_voted_str, num_consensus_rpcs, num_active_rpcs, @@ -541,7 +551,9 @@ impl Web3Rpcs { } Ordering::Greater => { debug!( - "new {}{}/{}/{} con={} rpc={}@{}", + "new {}/{} {}{}/{}/{} con={} rpc={}@{}", + consensus_tier, + total_tiers, backups_voted_str, num_consensus_rpcs, num_active_rpcs, @@ -573,7 +585,9 @@ impl Web3Rpcs { if num_active_rpcs >= self.min_head_rpcs { // no consensus!!! error!( - "non {}{}/{}/{} rpc={}@{}", + "non {}/{} {}{}/{}/{} rpc={}@{}", + consensus_tier, + total_tiers, backups_voted_str, num_consensus_rpcs, num_active_rpcs, @@ -584,7 +598,9 @@ impl Web3Rpcs { } else { // no consensus, but we do not have enough rpcs connected yet to panic debug!( - "non {}{}/{}/{} rpc={}@{}", + "non {}/{} {}{}/{}/{} rpc={}@{}", + consensus_tier, + total_tiers, backups_voted_str, num_consensus_rpcs, num_active_rpcs, diff --git a/web3_proxy/src/rpcs/consensus.rs b/web3_proxy/src/rpcs/consensus.rs index 847892cf..62901b59 100644 --- a/web3_proxy/src/rpcs/consensus.rs +++ b/web3_proxy/src/rpcs/consensus.rs @@ -7,15 +7,18 @@ use anyhow::Context; use ethers::prelude::{H256, U64}; use hashbrown::{HashMap, HashSet}; use log::{debug, trace, warn}; +use moka::future::Cache; use serde::Serialize; use std::collections::BTreeMap; use std::fmt; use std::sync::Arc; +use tokio::time::Instant; /// A collection of Web3Rpcs that are on the same block. /// Serialize is so we can print it on our debug endpoint #[derive(Clone, Default, Serialize)] pub struct ConsensusWeb3Rpcs { + pub(super) tier: u64, pub(super) head_block: Option, // TODO: this should be able to serialize, but it isn't #[serde(skip_serializing)] @@ -74,22 +77,25 @@ impl Web3Rpcs { } } +type FirstSeenCache = Cache; + pub struct ConnectionsGroup { rpc_name_to_block: HashMap, // TODO: what if there are two blocks with the same number? highest_block: Option, -} - -impl Default for ConnectionsGroup { - fn default() -> Self { - Self { - rpc_name_to_block: Default::default(), - highest_block: Default::default(), - } - } + /// used to track rpc.head_latency. The same cache should be shared between all ConnectionsGroups + first_seen: FirstSeenCache, } impl ConnectionsGroup { + pub fn new(first_seen: FirstSeenCache) -> Self { + Self { + rpc_name_to_block: Default::default(), + highest_block: Default::default(), + first_seen, + } + } + pub fn len(&self) -> usize { self.rpc_name_to_block.len() } @@ -115,7 +121,17 @@ impl ConnectionsGroup { } } - fn insert(&mut self, rpc: &Web3Rpc, block: Web3ProxyBlock) -> Option { + async fn insert(&mut self, rpc: &Web3Rpc, block: Web3ProxyBlock) -> Option { + let first_seen = self + .first_seen + .get_with(*block.hash(), async move { Instant::now() }) + .await; + + // TODO: this should be 0 if we are first seen, but i think it will be slightly non-zero + rpc.head_latency + .write() + .record(first_seen.elapsed().as_secs_f64() * 1000.0); + // TODO: what about a reorg to the same height? if Some(block.number()) > self.highest_block.as_ref().map(|x| x.number()) { self.highest_block = Some(block.clone()); @@ -179,6 +195,7 @@ impl ConnectionsGroup { authorization: &Arc, web3_rpcs: &Web3Rpcs, min_consensus_block_num: Option, + tier: &u64, ) -> anyhow::Result { let mut maybe_head_block = match self.highest_block.clone() { None => return Err(anyhow::anyhow!("no blocks known")), @@ -191,13 +208,18 @@ impl ConnectionsGroup { if let Some(min_consensus_block_num) = min_consensus_block_num { maybe_head_block .number() + .saturating_add(1.into()) .saturating_sub(min_consensus_block_num) .as_u64() } else { - // TODO: get from app config? different chains probably should have different values. 10 is probably too much 10 }; + trace!( + "max_lag_consensus_to_highest: {}", + max_lag_consensus_to_highest + ); + let num_known = self.rpc_name_to_block.len(); if num_known < web3_rpcs.min_head_rpcs { @@ -338,7 +360,7 @@ impl ConnectionsGroup { } // success! this block has enough soft limit and nodes on it (or on later blocks) - let conns: Vec> = primary_consensus_rpcs + let rpcs: Vec> = primary_consensus_rpcs .into_iter() .filter_map(|conn_name| web3_rpcs.by_name.get(conn_name).cloned()) .collect(); @@ -349,8 +371,9 @@ impl ConnectionsGroup { let _ = maybe_head_block.number(); Ok(ConsensusWeb3Rpcs { + tier: *tier, head_block: Some(maybe_head_block), - rpcs: conns, + rpcs, backups_voted: backup_rpcs_voted, backups_needed: primary_rpcs_voted.is_none(), }) @@ -377,10 +400,15 @@ impl ConsensusFinder { max_block_age: Option, max_block_lag: Option, ) -> Self { + // TODO: what's a good capacity for this? + let first_seen = Cache::builder() + .max_capacity(16) + .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); + // TODO: this will need some thought when config reloading is written let tiers = configured_tiers .iter() - .map(|x| (*x, Default::default())) + .map(|x| (*x, ConnectionsGroup::new(first_seen.clone()))) .collect(); Self { @@ -389,9 +417,11 @@ impl ConsensusFinder { max_block_lag, } } -} -impl ConsensusFinder { + pub fn len(&self) -> usize { + self.tiers.len() + } + /// get the ConnectionsGroup that contains all rpcs /// panics if there are no tiers pub fn all_rpcs_group(&self) -> Option<&ConnectionsGroup> { @@ -421,7 +451,11 @@ impl ConsensusFinder { } /// returns the block that the rpc was on before updating to the new_block - pub fn insert(&mut self, rpc: &Web3Rpc, new_block: Web3ProxyBlock) -> Option { + pub async fn insert( + &mut self, + rpc: &Web3Rpc, + new_block: Web3ProxyBlock, + ) -> Option { let mut old = None; // TODO: error if rpc.tier is not in self.tiers @@ -432,7 +466,7 @@ impl ConsensusFinder { } // TODO: should new_block be a ref? - let x = tier_group.insert(rpc, new_block.clone()); + let x = tier_group.insert(rpc, new_block.clone()).await; if old.is_none() && x.is_some() { old = x; @@ -473,7 +507,7 @@ impl ConsensusFinder { } } - if let Some(prev_block) = self.insert(&rpc, rpc_head_block.clone()) { + if let Some(prev_block) = self.insert(&rpc, rpc_head_block.clone()).await { if prev_block.hash() == rpc_head_block.hash() { // this block was already sent by this rpc. return early false @@ -527,13 +561,13 @@ impl ConsensusFinder { // TODO: how should errors be handled? // TODO: find the best tier with a connectionsgroup. best case, this only queries the first tier // TODO: do we need to calculate all of them? I think having highest_known_block included as part of min_block_num should make that unnecessary - for (i, x) in self.tiers.iter() { - trace!("checking tier {}: {:#?}", i, x.rpc_name_to_block); + for (tier, x) in self.tiers.iter() { + trace!("checking tier {}: {:#?}", tier, x.rpc_name_to_block); if let Ok(consensus_head_connections) = x - .consensus_head_connections(authorization, web3_connections, min_block_num) + .consensus_head_connections(authorization, web3_connections, min_block_num, tier) .await { - trace!("success on tier {}", i); + trace!("success on tier {}", tier); // we got one! hopefully it didn't need to use any backups. // but even if it did need backup servers, that is better than going to a worse tier return Ok(consensus_head_connections); @@ -546,8 +580,8 @@ impl ConsensusFinder { #[cfg(test)] mod test { - #[test] - fn test_simplest_case_consensus_head_connections() { - todo!(); - } + // #[test] + // fn test_simplest_case_consensus_head_connections() { + // todo!(); + // } } diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 19958016..4a4d1995 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -458,10 +458,7 @@ impl Web3Rpcs { max_block_needed )) } - cmp::Ordering::Less => { - // hmmmm - todo!("now what do we do?"); - } + cmp::Ordering::Less => min_block_needed.cmp(head_block_num), } } }; diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 5b030bad..8bc94243 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -21,33 +21,74 @@ use serde_json::json; use std::cmp::min; use std::fmt; use std::hash::{Hash, Hasher}; -use std::sync::atomic::{self, AtomicU64}; +use std::sync::atomic::{self, AtomicU64, AtomicUsize}; use std::{cmp::Ordering, sync::Arc}; use thread_fast_rng::rand::Rng; use thread_fast_rng::thread_fast_rng; use tokio::sync::{broadcast, oneshot, watch, RwLock as AsyncRwLock}; use tokio::time::{sleep, sleep_until, timeout, Duration, Instant}; -pub struct Web3RpcLatencies { - /// Traack how far behind the fastest node we are - pub new_head: Histogram, - /// exponentially weighted moving average of how far behind the fastest node we are - pub new_head_ewma: u32, - /// Track how long an rpc call takes on average - pub request: Histogram, - /// exponentially weighted moving average of how far behind the fastest node we are - pub request_ewma: u32, +pub struct Latency { + /// Track how many milliseconds slower we are than the fastest node + pub histogram: Histogram, + /// exponentially weighted moving average of how many milliseconds behind the fastest node we are + pub ewma: ewma::EWMA, } -impl Default for Web3RpcLatencies { +impl Serialize for Latency { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut state = serializer.serialize_struct("latency", 6)?; + + state.serialize_field("ewma_ms", &self.ewma.value())?; + + state.serialize_field("histogram_len", &self.histogram.len())?; + state.serialize_field("mean_ms", &self.histogram.mean())?; + state.serialize_field("p50_ms", &self.histogram.value_at_quantile(0.50))?; + state.serialize_field("p75_ms", &self.histogram.value_at_quantile(0.75))?; + state.serialize_field("p99_ms", &self.histogram.value_at_quantile(0.99))?; + + state.end() + } +} + +impl Latency { + pub fn record(&mut self, milliseconds: f64) { + self.ewma.add(milliseconds); + + // histogram needs ints and not floats + self.histogram.record(milliseconds as u64).unwrap(); + } +} + +impl Default for Latency { fn default() -> Self { - todo!("use ewma crate, not u32"); - Self { - new_head: Histogram::new(3).unwrap(), - new_head_ewma: 0, - request: Histogram::new(3).unwrap(), - request_ewma: 0, - } + // TODO: what should the default sigfig be? + let sigfig = 0; + + // TODO: what should the default span be? 25 requests? have a "new" + let span = 25.0; + + Self::new(sigfig, span).expect("default histogram sigfigs should always work") + } +} + +impl Latency { + pub fn new(sigfig: u8, span: f64) -> Result { + let alpha = Self::span_to_alpha(span); + + let histogram = Histogram::new(sigfig)?; + + Ok(Self { + histogram, + ewma: ewma::EWMA::new(alpha), + }) + } + + fn span_to_alpha(span: f64) -> f64 { + 2.0 / (span + 1.0) } } @@ -83,8 +124,13 @@ pub struct Web3Rpc { pub(super) tier: u64, /// TODO: change this to a watch channel so that http providers can subscribe and take action on change. pub(super) head_block: RwLock>, - /// Track how fast this RPC is - pub(super) latency: Web3RpcLatencies, + /// Track head block latency + pub(super) head_latency: RwLock, + /// Track request latency + pub(super) request_latency: RwLock, + /// Track total requests served + /// TODO: maybe move this to graphana + pub(super) total_requests: AtomicUsize, } impl Web3Rpc { @@ -1081,7 +1127,7 @@ impl Serialize for Web3Rpc { S: Serializer, { // 3 is the number of fields in the struct. - let mut state = serializer.serialize_struct("Web3Rpc", 9)?; + let mut state = serializer.serialize_struct("Web3Rpc", 10)?; // the url is excluded because it likely includes private information. just show the name that we use in keys state.serialize_field("name", &self.name)?; @@ -1103,17 +1149,17 @@ impl Serialize for Web3Rpc { state.serialize_field("soft_limit", &self.soft_limit)?; - // TODO: keep this for the "popularity_contest" command? or maybe better to just use graphana? - // state.serialize_field( - // "frontend_requests", - // &self.frontend_requests.load(atomic::Ordering::Relaxed), - // )?; + // TODO: maybe this is too much data. serialize less? + state.serialize_field("head_block", &*self.head_block.read())?; - { - // TODO: maybe this is too much data. serialize less? - let head_block = &*self.head_block.read(); - state.serialize_field("head_block", head_block)?; - } + state.serialize_field("head_latency", &*self.head_latency.read())?; + + state.serialize_field("request_latency", &*self.request_latency.read())?; + + state.serialize_field( + "total_requests", + &self.total_requests.load(atomic::Ordering::Relaxed), + )?; state.end() } @@ -1207,7 +1253,6 @@ mod tests { let block_data_limit = 64; - // TODO: this is getting long. have a `impl Default` let x = Web3Rpc { name: "name".to_string(), soft_limit: 1_000, diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index b3f4864a..7a2d735d 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -183,6 +183,12 @@ impl OpenRequestHandle { let provider = provider.expect("provider was checked already"); + self.rpc + .total_requests + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + + let start = Instant::now(); + // TODO: replace ethers-rs providers with our own that supports streaming the responses let response = match provider.as_ref() { #[cfg(test)] @@ -367,6 +373,12 @@ impl OpenRequestHandle { tokio::spawn(f); } } + } else { + // TODO: locking now will slow us down. send latency into a channel instead + self.rpc + .request_latency + .write() + .record(start.elapsed().as_secs_f64() * 1000.0); } response From 774bd5b2328ccf6412585bd988ee3e7c171bbb70 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 12:59:03 -0800 Subject: [PATCH 35/47] undo a rename in the json --- web3_proxy/src/rpcs/many.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 4a4d1995..33e03177 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -1134,7 +1134,8 @@ impl Serialize for Web3Rpcs { let mut state = serializer.serialize_struct("Web3Rpcs", 6)?; let rpcs: Vec<&Web3Rpc> = self.by_name.values().map(|x| x.as_ref()).collect(); - state.serialize_field("rpcs", &rpcs)?; + // TODO: coordinate with frontend team to rename "conns" to "rpcs" + state.serialize_field("conns", &rpcs)?; { let consensus_connections = self.watch_consensus_rpcs_sender.borrow().clone(); From 056670d29aa1f6361dca29c54e3f1fae801b5ca2 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 15:31:59 -0800 Subject: [PATCH 36/47] health check and latency recordings --- web3_proxy/src/rpcs/one.rs | 94 ++++++++++++++++++++++++++++++---- web3_proxy/src/rpcs/request.rs | 11 ++-- 2 files changed, 88 insertions(+), 17 deletions(-) diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 8bc94243..7603f7e6 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -5,9 +5,10 @@ use super::request::{OpenRequestHandle, OpenRequestResult}; use crate::app::{flatten_handle, AnyhowJoinHandle}; use crate::config::{BlockAndRpc, Web3RpcConfig}; use crate::frontend::authorization::Authorization; +use crate::rpcs::request::RequestRevertHandler; use anyhow::{anyhow, Context}; use ethers::prelude::{Bytes, Middleware, ProviderError, TxHash, H256, U64}; -use ethers::types::U256; +use ethers::types::{Transaction, U256}; use futures::future::try_join_all; use futures::StreamExt; use hdrhistogram::Histogram; @@ -128,6 +129,7 @@ pub struct Web3Rpc { pub(super) head_latency: RwLock, /// Track request latency pub(super) request_latency: RwLock, + pub(super) request_latency_sender: Option>, /// Track total requests served /// TODO: maybe move this to graphana pub(super) total_requests: AtomicUsize, @@ -215,6 +217,9 @@ impl Web3Rpc { } } + // TODO: what max capacity? + let (request_latency_sender, request_latency_receiver) = flume::bounded(10_000); + let new_connection = Self { name, db_conn: db_conn.clone(), @@ -229,6 +234,7 @@ impl Web3Rpc { backup, block_data_limit, tier: config.tier, + request_latency_sender: Some(request_latency_sender), ..Default::default() }; @@ -248,6 +254,7 @@ impl Web3Rpc { block_sender, chain_id, http_interval_sender, + request_latency_receiver, reconnect, tx_id_sender, ) @@ -634,9 +641,18 @@ impl Web3Rpc { block_sender: Option>, chain_id: u64, http_interval_sender: Option>>, + request_latency_receiver: flume::Receiver, reconnect: bool, tx_id_sender: Option)>>, ) -> anyhow::Result<()> { + let request_latency_receiver = Arc::new(request_latency_receiver); + + let revert_handler = if self.backup { + RequestRevertHandler::DebugLevel + } else { + RequestRevertHandler::ErrorLevel + }; + loop { let http_interval_receiver = http_interval_sender.as_ref().map(|x| x.subscribe()); @@ -662,24 +678,63 @@ impl Web3Rpc { // provider is ready ready_tx.send(()).unwrap(); - // TODO: how often? - // TODO: reset this timeout every time a new block is seen + // TODO: how often? different depending on the chain? + // TODO: reset this timeout when a new block is seen? we need to keep request_latency updated though let health_sleep_seconds = 10; - // wait before doing the initial health check - sleep(Duration::from_secs(health_sleep_seconds)).await; + let mut old_total_requests = 0; + let mut new_total_requests; loop { + sleep(Duration::from_secs(health_sleep_seconds)).await; + // TODO: what if we just happened to have this check line up with another restart? // TODO: think more about this if let Some(client) = &*conn.provider.read().await { - // trace!("health check unlocked with error on {}", conn); - // returning error will trigger a reconnect - // also, do the health check as a way of keeping this rpc's request_ewma accurate - // TODO: do a query of some kind - } + // health check as a way of keeping this rpc's request_ewma accurate + // TODO: do something different if this is a backup server? - sleep(Duration::from_secs(health_sleep_seconds)).await; + new_total_requests = + conn.total_requests.load(atomic::Ordering::Relaxed); + + if new_total_requests - old_total_requests < 10 { + // TODO: if this fails too many times, reset the connection + let head_block = conn.head_block.read().clone(); + + if let Some((block_hash, txid)) = head_block.and_then(|x| { + let block = x.block.clone(); + + let block_hash = block.hash?; + let txid = block.transactions.last().cloned()?; + + Some((block_hash, txid)) + }) { + let authorization = authorization.clone(); + let conn = conn.clone(); + let x = async move { + conn.try_request_handle(&authorization, None).await + }; + + if let Ok(OpenRequestResult::Handle(x)) = x.await { + if let Ok(Some(x)) = x + .request::<_, Option>( + "eth_getTransactionByHash", + &txid, + revert_handler, + None, + ) + .await + { + // TODO: make this flatter + // TODO: do more (fair, not random) things here + // let = x.request("eth_getCode", (tx.to.unwrap_or(Address::zero()), block_hash), RequestRevertHandler::ErrorLevel, Some(client.clone())) + } + } + } + } + + old_total_requests = new_total_requests; + } } }; @@ -708,6 +763,23 @@ impl Web3Rpc { futures.push(flatten_handle(tokio::spawn(f))); } + { + let conn = self.clone(); + let request_latency_receiver = request_latency_receiver.clone(); + + let f = async move { + while let Ok(latency) = request_latency_receiver.recv_async().await { + conn.request_latency + .write() + .record(latency.as_secs_f64() * 1000.0); + } + + Ok(()) + }; + + futures.push(flatten_handle(tokio::spawn(f))); + } + match try_join_all(futures).await { Ok(_) => { // futures all exited without error. break instead of restarting subscriptions diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index 7a2d735d..a4897579 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -33,6 +33,7 @@ pub struct OpenRequestHandle { } /// Depending on the context, RPC errors can require different handling. +#[derive(Copy, Clone)] pub enum RequestRevertHandler { /// Log at the trace level. Use when errors are expected. TraceLevel, @@ -373,12 +374,10 @@ impl OpenRequestHandle { tokio::spawn(f); } } - } else { - // TODO: locking now will slow us down. send latency into a channel instead - self.rpc - .request_latency - .write() - .record(start.elapsed().as_secs_f64() * 1000.0); + } else if let Some(x) = self.rpc.request_latency_sender.as_ref() { + if let Err(err) = x.send(start.elapsed()) { + error!("no request latency sender! {:#?}", err); + } } response From 268ad3a7c56a1ea50a02465ce6cf8b530a4bf7fa Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 15:37:43 -0800 Subject: [PATCH 37/47] dont hold a lock open --- web3_proxy/src/rpcs/one.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 7603f7e6..be8f9312 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -690,7 +690,7 @@ impl Web3Rpc { // TODO: what if we just happened to have this check line up with another restart? // TODO: think more about this - if let Some(client) = &*conn.provider.read().await { + if let Some(client) = conn.provider.read().await.clone() { // health check as a way of keeping this rpc's request_ewma accurate // TODO: do something different if this is a backup server? @@ -711,11 +711,13 @@ impl Web3Rpc { }) { let authorization = authorization.clone(); let conn = conn.clone(); - let x = async move { - conn.try_request_handle(&authorization, None).await - }; - if let Ok(OpenRequestResult::Handle(x)) = x.await { + let x = async move { + conn.try_request_handle(&authorization, Some(client)).await + } + .await; + + if let Ok(OpenRequestResult::Handle(x)) = x { if let Ok(Some(x)) = x .request::<_, Option>( "eth_getTransactionByHash", From 63adbc05872d6692e1cfcf44b5712a67b99e7d44 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 15:42:27 -0800 Subject: [PATCH 38/47] remove health for now? --- web3_proxy/src/rpcs/one.rs | 58 +++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index be8f9312..8fc1b656 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -699,40 +699,40 @@ impl Web3Rpc { if new_total_requests - old_total_requests < 10 { // TODO: if this fails too many times, reset the connection - let head_block = conn.head_block.read().clone(); + // let head_block = conn.head_block.read().clone(); - if let Some((block_hash, txid)) = head_block.and_then(|x| { - let block = x.block.clone(); + // if let Some((block_hash, txid)) = head_block.and_then(|x| { + // let block = x.block.clone(); - let block_hash = block.hash?; - let txid = block.transactions.last().cloned()?; + // let block_hash = block.hash?; + // let txid = block.transactions.last().cloned()?; - Some((block_hash, txid)) - }) { - let authorization = authorization.clone(); - let conn = conn.clone(); + // Some((block_hash, txid)) + // }) { + // let authorization = authorization.clone(); + // let conn = conn.clone(); - let x = async move { - conn.try_request_handle(&authorization, Some(client)).await - } - .await; + // let x = async move { + // conn.try_request_handle(&authorization, Some(client)).await + // } + // .await; - if let Ok(OpenRequestResult::Handle(x)) = x { - if let Ok(Some(x)) = x - .request::<_, Option>( - "eth_getTransactionByHash", - &txid, - revert_handler, - None, - ) - .await - { - // TODO: make this flatter - // TODO: do more (fair, not random) things here - // let = x.request("eth_getCode", (tx.to.unwrap_or(Address::zero()), block_hash), RequestRevertHandler::ErrorLevel, Some(client.clone())) - } - } - } + // if let Ok(OpenRequestResult::Handle(x)) = x { + // if let Ok(Some(x)) = x + // .request::<_, Option>( + // "eth_getTransactionByHash", + // &txid, + // revert_handler, + // None, + // ) + // .await + // { + // // TODO: make this flatter + // // TODO: do more (fair, not random) things here + // // let = x.request("eth_getCode", (tx.to.unwrap_or(Address::zero()), block_hash), RequestRevertHandler::ErrorLevel, Some(client.clone())) + // } + // } + // } } old_total_requests = new_total_requests; From 3c9576c13bc9421f8c94b423e645e287c9628639 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 15:52:42 -0800 Subject: [PATCH 39/47] yes health, no channel --- web3_proxy/src/rpcs/one.rs | 84 ++++++++++++---------------------- web3_proxy/src/rpcs/request.rs | 10 ++-- 2 files changed, 35 insertions(+), 59 deletions(-) diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 8fc1b656..b4d6cebb 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -129,7 +129,6 @@ pub struct Web3Rpc { pub(super) head_latency: RwLock, /// Track request latency pub(super) request_latency: RwLock, - pub(super) request_latency_sender: Option>, /// Track total requests served /// TODO: maybe move this to graphana pub(super) total_requests: AtomicUsize, @@ -217,9 +216,6 @@ impl Web3Rpc { } } - // TODO: what max capacity? - let (request_latency_sender, request_latency_receiver) = flume::bounded(10_000); - let new_connection = Self { name, db_conn: db_conn.clone(), @@ -234,7 +230,6 @@ impl Web3Rpc { backup, block_data_limit, tier: config.tier, - request_latency_sender: Some(request_latency_sender), ..Default::default() }; @@ -254,7 +249,6 @@ impl Web3Rpc { block_sender, chain_id, http_interval_sender, - request_latency_receiver, reconnect, tx_id_sender, ) @@ -641,12 +635,9 @@ impl Web3Rpc { block_sender: Option>, chain_id: u64, http_interval_sender: Option>>, - request_latency_receiver: flume::Receiver, reconnect: bool, tx_id_sender: Option)>>, ) -> anyhow::Result<()> { - let request_latency_receiver = Arc::new(request_latency_receiver); - let revert_handler = if self.backup { RequestRevertHandler::DebugLevel } else { @@ -699,40 +690,40 @@ impl Web3Rpc { if new_total_requests - old_total_requests < 10 { // TODO: if this fails too many times, reset the connection - // let head_block = conn.head_block.read().clone(); + let head_block = conn.head_block.read().clone(); - // if let Some((block_hash, txid)) = head_block.and_then(|x| { - // let block = x.block.clone(); + if let Some((block_hash, txid)) = head_block.and_then(|x| { + let block = x.block.clone(); - // let block_hash = block.hash?; - // let txid = block.transactions.last().cloned()?; + let block_hash = block.hash?; + let txid = block.transactions.last().cloned()?; - // Some((block_hash, txid)) - // }) { - // let authorization = authorization.clone(); - // let conn = conn.clone(); + Some((block_hash, txid)) + }) { + let authorization = authorization.clone(); + let conn = conn.clone(); - // let x = async move { - // conn.try_request_handle(&authorization, Some(client)).await - // } - // .await; + let x = async move { + conn.try_request_handle(&authorization, Some(client)).await + } + .await; - // if let Ok(OpenRequestResult::Handle(x)) = x { - // if let Ok(Some(x)) = x - // .request::<_, Option>( - // "eth_getTransactionByHash", - // &txid, - // revert_handler, - // None, - // ) - // .await - // { - // // TODO: make this flatter - // // TODO: do more (fair, not random) things here - // // let = x.request("eth_getCode", (tx.to.unwrap_or(Address::zero()), block_hash), RequestRevertHandler::ErrorLevel, Some(client.clone())) - // } - // } - // } + if let Ok(OpenRequestResult::Handle(x)) = x { + if let Ok(Some(x)) = x + .request::<_, Option>( + "eth_getTransactionByHash", + &txid, + revert_handler, + None, + ) + .await + { + // TODO: make this flatter + // TODO: do more (fair, not random) things here + // let = x.request("eth_getCode", (tx.to.unwrap_or(Address::zero()), block_hash), RequestRevertHandler::ErrorLevel, Some(client.clone())) + } + } + } } old_total_requests = new_total_requests; @@ -765,23 +756,6 @@ impl Web3Rpc { futures.push(flatten_handle(tokio::spawn(f))); } - { - let conn = self.clone(); - let request_latency_receiver = request_latency_receiver.clone(); - - let f = async move { - while let Ok(latency) = request_latency_receiver.recv_async().await { - conn.request_latency - .write() - .record(latency.as_secs_f64() * 1000.0); - } - - Ok(()) - }; - - futures.push(flatten_handle(tokio::spawn(f))); - } - match try_join_all(futures).await { Ok(_) => { // futures all exited without error. break instead of restarting subscriptions diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index a4897579..11383b08 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -374,10 +374,12 @@ impl OpenRequestHandle { tokio::spawn(f); } } - } else if let Some(x) = self.rpc.request_latency_sender.as_ref() { - if let Err(err) = x.send(start.elapsed()) { - error!("no request latency sender! {:#?}", err); - } + } else { + let latency_ms = start.elapsed().as_secs_f64() * 1000.0; + + let mut latency_recording = self.rpc.request_latency.write(); + + latency_recording.record(latency_ms); } response From bc306f62d494e0f623f02e12427f385e7c7ac820 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 21:05:41 -0800 Subject: [PATCH 40/47] make it work --- web3_proxy/src/rpcs/many.rs | 139 +++++++++++++++++------------------- 1 file changed, 66 insertions(+), 73 deletions(-) diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 33e03177..cc671ab7 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -420,7 +420,7 @@ impl Web3Rpcs { unimplemented!("this shouldn't be possible") } - pub async fn best_consensus_head_connection( + pub async fn best_available_rpc( &self, authorization: &Arc, request_metadata: Option<&Arc>, @@ -436,8 +436,6 @@ impl Web3Rpcs { if let Some(head_block) = synced_connections.head_block.as_ref() { (head_block.number(), head_block.age()) } else { - // TODO: optionally wait for a head_block.number() >= min_block_needed - // TODO: though i think that wait would actually need to be earlier in the request return Ok(OpenRequestResult::NotReady); }; @@ -463,6 +461,8 @@ impl Web3Rpcs { } }; + trace!("needed_blocks_comparison: {:?}", needed_blocks_comparison); + // collect "usable_rpcs_by_head_num_and_weight" // TODO: MAKE SURE None SORTS LAST? let mut m = BTreeMap::new(); @@ -470,6 +470,7 @@ impl Web3Rpcs { match needed_blocks_comparison { cmp::Ordering::Less => { // need an old block. check all the rpcs. ignore rpcs that are still syncing + trace!("old block needed"); let min_block_age = self.max_block_age.map(|x| head_block_age.saturating_sub(x)); @@ -517,12 +518,14 @@ impl Web3Rpcs { // TODO: do we really need to check head_num and age? if let Some(min_sync_num) = min_sync_num.as_ref() { if x_head_num < min_sync_num { + trace!("rpc is still syncing"); continue; } } if let Some(min_block_age) = min_block_age { - if x_head.age() < min_block_age { + if x_head.age() > min_block_age { // rpc is still syncing + trace!("block is too old"); continue; } } @@ -536,12 +539,22 @@ impl Web3Rpcs { // TODO: check min_synced_rpcs and min_sum_soft_limits? or maybe better to just try to serve the request? } cmp::Ordering::Equal => { - // need the consensus head block. filter the synced rpcs - for x in synced_connections.rpcs.iter().filter(|x| !skip.contains(x)) { - // the key doesn't matter if we are checking synced connections. its already sized to what we need - let key = (0, None); + // using the consensus head block. filter the synced rpcs - m.entry(key).or_insert_with(Vec::new).push(x.clone()); + // the key doesn't matter if we are checking synced connections + // they are all at the same block and it is already sized to what we need + let key = (0, None); + + for x in synced_connections.rpcs.iter() { + if skip.contains(x) { + trace!("skipping: {}", x); + continue; + } + trace!("not skipped!"); + + m.entry(key.clone()) + .or_insert_with(Vec::new) + .push(x.clone()); } } cmp::Ordering::Greater => { @@ -553,6 +566,11 @@ impl Web3Rpcs { m }; + trace!( + "usable_rpcs_by_tier_and_head_number: {:#?}", + usable_rpcs_by_tier_and_head_number + ); + let mut earliest_retry_at = None; for mut usable_rpcs in usable_rpcs_by_tier_and_head_number.into_values() { @@ -570,9 +588,13 @@ impl Web3Rpcs { // pick the first two and try the one with the lower rpc.latency.ewma // TODO: chunks or tuple windows? for (rpc_a, rpc_b) in usable_rpcs.into_iter().circular_tuple_windows() { + trace!("{} vs {}", rpc_a, rpc_b); + // TODO: cached key to save a read lock + // TODO: ties to the server with the smallest block_data_limit let best_rpc = min_by_key(rpc_a, rpc_b, |x| { OrderedFloat(x.request_latency.read().ewma.value()) }); + trace!("winner: {}", best_rpc); // just because it has lower latency doesn't mean we are sure to get a connection match best_rpc.try_request_handle(authorization, None).await { @@ -585,6 +607,7 @@ impl Web3Rpcs { } Ok(OpenRequestResult::NotReady) => { // TODO: log a warning? emit a stat? + trace!("best_rpc not ready"); } Err(err) => { warn!("No request handle for {}. err={:?}", best_rpc, err) @@ -771,7 +794,7 @@ impl Web3Rpcs { } match self - .best_consensus_head_connection( + .best_available_rpc( authorization, request_metadata, &skip_rpcs, @@ -922,65 +945,34 @@ impl Web3Rpcs { request_metadata.no_servers.fetch_add(1, Ordering::Release); } - // todo!( - // "check if we are requesting an old block and no archive servers are synced" - // ); - - if let Some(min_block_needed) = min_block_needed { - let mut theres_a_chance = false; - - for potential_conn in self.by_name.values() { - if skip_rpcs.contains(potential_conn) { - continue; - } - - // TODO: should we instead check if has_block_data but with the current head block? - if potential_conn.has_block_data(min_block_needed) { - trace!("chance for {} on {}", min_block_needed, potential_conn); - theres_a_chance = true; - break; - } - - skip_rpcs.push(potential_conn.clone()); - } - - if !theres_a_chance { - debug!("no chance of finding data in block #{}", min_block_needed); - break; - } - } - - debug!("No servers ready. Waiting up for change in synced servers"); - - watch_consensus_connections.changed().await?; - watch_consensus_connections.borrow_and_update(); + break; } } } - if let Some(r) = method_not_available_response { - // TODO: emit a stat for unsupported methods? - return Ok(r); - } - - // TODO: do we need this here, or do we do it somewhere else? + // TODO: do we need this here, or do we do it somewhere else? like, the code could change and a try operator in here would skip this increment if let Some(request_metadata) = request_metadata { request_metadata .error_response .store(true, Ordering::Release); } + if let Some(r) = method_not_available_response { + // TODO: emit a stat for unsupported methods? it would be best to block them at the proxy instead of at the backend + return Ok(r); + } + let num_conns = self.by_name.len(); let num_skipped = skip_rpcs.len(); if num_skipped == 0 { - error!("No servers synced ({} known)", num_conns); + error!("No servers synced ({} known). None skipped", num_conns); - return Ok(JsonRpcForwardedResponse::from_str( + Ok(JsonRpcForwardedResponse::from_str( "No servers synced", Some(-32000), Some(request.id), - )); + )) } else { // TODO: warn? debug? trace? warn!( @@ -990,11 +982,11 @@ impl Web3Rpcs { // TODO: what error code? // cloudflare gives {"jsonrpc":"2.0","error":{"code":-32043,"message":"Requested data cannot be older than 128 blocks."},"id":1} - return Ok(JsonRpcForwardedResponse::from_str( + Ok(JsonRpcForwardedResponse::from_str( "Requested data is not available", Some(-32043), Some(request.id), - )); + )) } } @@ -1396,7 +1388,7 @@ mod tests { // best_synced_backend_connection requires servers to be synced with the head block let x = rpcs - .best_consensus_head_connection(&authorization, None, &[], None, None) + .best_available_rpc(&authorization, None, &[], None, None) .await .unwrap(); @@ -1447,29 +1439,28 @@ mod tests { assert_eq!(rpcs.num_synced_rpcs(), 1); assert!(matches!( - rpcs.best_consensus_head_connection(&authorization, None, &[], None, None) + rpcs.best_available_rpc(&authorization, None, &[], None, None) .await, Ok(OpenRequestResult::Handle(_)) )); assert!(matches!( - rpcs.best_consensus_head_connection(&authorization, None, &[], Some(&0.into()), None) + rpcs.best_available_rpc(&authorization, None, &[], Some(&0.into()), None) .await, Ok(OpenRequestResult::Handle(_)) )); assert!(matches!( - rpcs.best_consensus_head_connection(&authorization, None, &[], Some(&1.into()), None) + rpcs.best_available_rpc(&authorization, None, &[], Some(&1.into()), None) .await, Ok(OpenRequestResult::Handle(_)) )); // future block should not get a handle - assert!(matches!( - rpcs.best_consensus_head_connection(&authorization, None, &[], Some(&2.into()), None) - .await, - Ok(OpenRequestResult::NotReady) - )); + let future_rpc = rpcs + .best_available_rpc(&authorization, None, &[], Some(&2.into()), None) + .await; + assert!(matches!(future_rpc, Ok(OpenRequestResult::NotReady))); } #[tokio::test] @@ -1505,6 +1496,7 @@ mod tests { block_data_limit: 64.into(), tier: 1, head_block: RwLock::new(Some(head_block.clone())), + provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), ..Default::default() }; @@ -1516,6 +1508,7 @@ mod tests { block_data_limit: u64::MAX.into(), tier: 2, head_block: RwLock::new(Some(head_block.clone())), + provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), ..Default::default() }; @@ -1583,25 +1576,25 @@ mod tests { // best_synced_backend_connection requires servers to be synced with the head block // TODO: test with and without passing the head_block.number? - let best_head_server = rpcs - .best_consensus_head_connection( - &authorization, - None, - &[], - Some(&head_block.number()), - None, - ) + let best_available_server = rpcs + .best_available_rpc(&authorization, None, &[], Some(&head_block.number()), None) .await; - debug!("best_head_server: {:#?}", best_head_server); + debug!("best_available_server: {:#?}", best_available_server); assert!(matches!( - best_head_server.unwrap(), + best_available_server.unwrap(), OpenRequestResult::Handle(_) )); + let best_available_server_from_none = rpcs + .best_available_rpc(&authorization, None, &[], None, None) + .await; + + // assert_eq!(best_available_server, best_available_server_from_none); + let best_archive_server = rpcs - .best_consensus_head_connection(&authorization, None, &[], Some(&1.into()), None) + .best_available_rpc(&authorization, None, &[], Some(&1.into()), None) .await; match best_archive_server { From 91cab8ffe2f258c2362ba6b9e1de78006efbbf42 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 21:09:39 -0800 Subject: [PATCH 41/47] comment --- web3_proxy/src/rpcs/request.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index 11383b08..2f2cf7b3 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -377,6 +377,7 @@ impl OpenRequestHandle { } else { let latency_ms = start.elapsed().as_secs_f64() * 1000.0; + // TODO: is this lock here a problem? should this be done through a channel? i started to code it, but it didn't seem to matter let mut latency_recording = self.rpc.request_latency.write(); latency_recording.record(latency_ms); From b7dfec9c63e4eaad3d6f5e1ebe4488af74f6d747 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 21:13:24 -0800 Subject: [PATCH 42/47] fix params in health check query --- web3_proxy/src/rpcs/one.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index b4d6cebb..8ee53e9a 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -712,7 +712,7 @@ impl Web3Rpc { if let Ok(Some(x)) = x .request::<_, Option>( "eth_getTransactionByHash", - &txid, + &(txid,), revert_handler, None, ) From 02bada300658475cde48cb66a2ca8f237293d86b Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 21:16:33 -0800 Subject: [PATCH 43/47] properly encode number responses as hex --- web3_proxy/src/app/mod.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index 303ea187..351dabf4 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -1175,9 +1175,7 @@ impl Web3ProxyApp { } } } - "eth_chainId" => { - json!(U64::from(self.config.chain_id)) - } + "eth_chainId" => serde_json::Value::Number(U64::from(self.config.chain_id)), // TODO: eth_callBundle (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_callbundle) // TODO: eth_cancelPrivateTransaction (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_cancelprivatetransaction, but maybe just reject) // TODO: eth_sendPrivateTransaction (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_sendprivatetransaction) @@ -1227,11 +1225,11 @@ impl Web3ProxyApp { // TODO: eth_gasPrice that does awesome magic to predict the future "eth_hashrate" => { // no stats on this. its cheap - json!(U64::zero()) + serde_json::Value::Number(U64::zero()) } "eth_mining" => { // no stats on this. its cheap - json!(false) + serde_json::Value::Bool(false) } // TODO: eth_sendBundle (flashbots command) // broadcast transactions to all private rpcs at once @@ -1363,7 +1361,7 @@ impl Web3ProxyApp { "eth_syncing" => { // no stats on this. its cheap // TODO: return a real response if all backends are syncing or if no servers in sync - json!(false) + serde_json::Value::Bool(false) } "eth_subscribe" => { return Ok(( @@ -1388,12 +1386,12 @@ impl Web3ProxyApp { "net_listening" => { // no stats on this. its cheap // TODO: only if there are some backends on balanced_rpcs? - json!(true) + serde_json::Value::Bool(true) } "net_peerCount" => { // no stats on this. its cheap // TODO: do something with proxy_mode here? - self.balanced_rpcs.num_synced_rpcs().into() + serde_json::Value::Number(U64::from(self.balanced_rpcs.num_synced_rpcs())) } "web3_clientVersion" => { // no stats on this. its cheap From f2cfe2956abc9610f28fa95142d93c7965eda342 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 21:19:24 -0800 Subject: [PATCH 44/47] the macro is easier --- web3_proxy/src/app/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index 351dabf4..06531264 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -1175,7 +1175,7 @@ impl Web3ProxyApp { } } } - "eth_chainId" => serde_json::Value::Number(U64::from(self.config.chain_id)), + "eth_chainId" => json!(U64::from(self.config.chain_id)), // TODO: eth_callBundle (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_callbundle) // TODO: eth_cancelPrivateTransaction (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_cancelprivatetransaction, but maybe just reject) // TODO: eth_sendPrivateTransaction (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_sendprivatetransaction) @@ -1225,7 +1225,7 @@ impl Web3ProxyApp { // TODO: eth_gasPrice that does awesome magic to predict the future "eth_hashrate" => { // no stats on this. its cheap - serde_json::Value::Number(U64::zero()) + json!(U64::zero()) } "eth_mining" => { // no stats on this. its cheap @@ -1391,7 +1391,7 @@ impl Web3ProxyApp { "net_peerCount" => { // no stats on this. its cheap // TODO: do something with proxy_mode here? - serde_json::Value::Number(U64::from(self.balanced_rpcs.num_synced_rpcs())) + json!(U64::from(self.balanced_rpcs.num_synced_rpcs())) } "web3_clientVersion" => { // no stats on this. its cheap From cb2b5c16ad692f140a021f3ac005b493a5a0fede Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 21:30:42 -0800 Subject: [PATCH 45/47] lower log level --- web3_proxy/src/rpcs/one.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 8ee53e9a..8e49a92c 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -883,7 +883,11 @@ impl Web3Rpc { broadcast::error::RecvError::Lagged(lagged) => { // querying the block was delayed // this can happen if tokio is very busy or waiting for requests limits took too long - warn!("http interval on {} lagging by {}!", self, lagged); + if self.backup { + debug!("http interval on {} lagging by {}!", self, lagged); + } else { + warn!("http interval on {} lagging by {}!", self, lagged); + } } } } From 4916188d5c7851fde765b07a009129419f64da03 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 15 Feb 2023 21:54:07 -0800 Subject: [PATCH 46/47] theres a perf regression in here somewhere --- web3_proxy/src/rpcs/one.rs | 9 ++++++++- web3_proxy/src/rpcs/request.rs | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 8e49a92c..dcbfe220 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -671,8 +671,14 @@ impl Web3Rpc { // TODO: how often? different depending on the chain? // TODO: reset this timeout when a new block is seen? we need to keep request_latency updated though - let health_sleep_seconds = 10; + // let health_sleep_seconds = 10; + futures::future::pending::<()>().await; + + Ok(()) + + // TODO: benchmark this and lock contention + /* let mut old_total_requests = 0; let mut new_total_requests; @@ -729,6 +735,7 @@ impl Web3Rpc { old_total_requests = new_total_requests; } } + */ }; futures.push(flatten_handle(tokio::spawn(f))); diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index 2f2cf7b3..8a9254a6 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -375,12 +375,13 @@ impl OpenRequestHandle { } } } else { - let latency_ms = start.elapsed().as_secs_f64() * 1000.0; + // TODO: record request latency + // let latency_ms = start.elapsed().as_secs_f64() * 1000.0; // TODO: is this lock here a problem? should this be done through a channel? i started to code it, but it didn't seem to matter - let mut latency_recording = self.rpc.request_latency.write(); + // let mut latency_recording = self.rpc.request_latency.write(); - latency_recording.record(latency_ms); + // latency_recording.record(latency_ms); } response From 738815244d8979c7f37973b2a6d1ab259a898218 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Thu, 16 Feb 2023 00:26:58 -0800 Subject: [PATCH 47/47] change ewma calculation --- web3_proxy/src/atomics.rs | 22 ++ web3_proxy/src/lib.rs | 1 + web3_proxy/src/peak_ewma.rs | 397 +++++++++++++++++++++++++++++++ web3_proxy/src/rpcs/consensus.rs | 9 +- web3_proxy/src/rpcs/many.rs | 15 +- web3_proxy/src/rpcs/one.rs | 179 +++++++++----- web3_proxy/src/rpcs/request.rs | 37 +-- 7 files changed, 578 insertions(+), 82 deletions(-) create mode 100644 web3_proxy/src/atomics.rs create mode 100644 web3_proxy/src/peak_ewma.rs diff --git a/web3_proxy/src/atomics.rs b/web3_proxy/src/atomics.rs new file mode 100644 index 00000000..8b0e8e5e --- /dev/null +++ b/web3_proxy/src/atomics.rs @@ -0,0 +1,22 @@ +use std::sync::atomic::{AtomicU64, Ordering}; + +pub struct AtomicF64 { + storage: AtomicU64, +} + +impl AtomicF64 { + pub fn new(value: f64) -> Self { + let as_u64 = value.to_bits(); + Self { + storage: AtomicU64::new(as_u64), + } + } + pub fn store(&self, value: f64, ordering: Ordering) { + let as_u64 = value.to_bits(); + self.storage.store(as_u64, ordering) + } + pub fn load(&self, ordering: Ordering) -> f64 { + let as_u64 = self.storage.load(ordering); + f64::from_bits(as_u64) + } +} diff --git a/web3_proxy/src/lib.rs b/web3_proxy/src/lib.rs index dcf6a8c1..cfcd4ba9 100644 --- a/web3_proxy/src/lib.rs +++ b/web3_proxy/src/lib.rs @@ -1,5 +1,6 @@ pub mod app; pub mod app_stats; +pub mod atomics; pub mod block_number; pub mod config; pub mod frontend; diff --git a/web3_proxy/src/peak_ewma.rs b/web3_proxy/src/peak_ewma.rs new file mode 100644 index 00000000..9adb34d9 --- /dev/null +++ b/web3_proxy/src/peak_ewma.rs @@ -0,0 +1,397 @@ +//! Code from [tower](https://github.com/tower-rs/tower/blob/3f31ffd2cf15f1e905142e5f43ab39ac995c22ed/tower/src/load/peak_ewma.rs) +//! Measures load using the PeakEWMA response latency. +//! TODO: refactor to work with our code + +use std::task::{Context, Poll}; +use std::{ + sync::{Arc, Mutex}, + time::Duration, +}; +use tokio::time::Instant; +use tower_service::Service; +use tracing::trace; + +/// Measures the load of the underlying service using Peak-EWMA load measurement. +/// +/// [`PeakEwma`] implements [`Load`] with the [`Cost`] metric that estimates the amount of +/// pending work to an endpoint. Work is calculated by multiplying the +/// exponentially-weighted moving average (EWMA) of response latencies by the number of +/// pending requests. The Peak-EWMA algorithm is designed to be especially sensitive to +/// worst-case latencies. Over time, the peak latency value decays towards the moving +/// average of latencies to the endpoint. +/// +/// When no latency information has been measured for an endpoint, an arbitrary default +/// RTT of 1 second is used to prevent the endpoint from being overloaded before a +/// meaningful baseline can be established.. +/// +/// ## Note +/// +/// This is derived from [Finagle][finagle], which is distributed under the Apache V2 +/// license. Copyright 2017, Twitter Inc. +/// +/// [finagle]: +/// https://github.com/twitter/finagle/blob/9cc08d15216497bb03a1cafda96b7266cfbbcff1/finagle-core/src/main/scala/com/twitter/finagle/loadbalancer/PeakEwma.scala +#[derive(Debug)] +pub struct PeakEwma { + service: S, + decay_ns: f64, + rtt_estimate: Arc>, + completion: C, +} + +#[cfg(feature = "discover")] +pin_project! { + /// Wraps a `D`-typed stream of discovered services with `PeakEwma`. + #[cfg_attr(docsrs, doc(cfg(feature = "discover")))] + #[derive(Debug)] + pub struct PeakEwmaDiscover { + #[pin] + discover: D, + decay_ns: f64, + default_rtt: Duration, + completion: C, + } +} + +/// Represents the relative cost of communicating with a service. +/// +/// The underlying value estimates the amount of pending work to a service: the Peak-EWMA +/// latency estimate multiplied by the number of pending requests. +#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)] +pub struct Cost(f64); + +/// Tracks an in-flight request and updates the RTT-estimate on Drop. +#[derive(Debug)] +pub struct Handle { + sent_at: Instant, + decay_ns: f64, + rtt_estimate: Arc>, +} + +/// Holds the current RTT estimate and the last time this value was updated. +#[derive(Debug)] +struct RttEstimate { + update_at: Instant, + rtt_ns: f64, +} + +const NANOS_PER_MILLI: f64 = 1_000_000.0; + +// ===== impl PeakEwma ===== + +impl PeakEwma { + /// Wraps an `S`-typed service so that its load is tracked by the EWMA of its peak latency. + pub fn new(service: S, default_rtt: Duration, decay_ns: f64, completion: C) -> Self { + debug_assert!(decay_ns > 0.0, "decay_ns must be positive"); + Self { + service, + decay_ns, + rtt_estimate: Arc::new(Mutex::new(RttEstimate::new(nanos(default_rtt)))), + completion, + } + } + + fn handle(&self) -> Handle { + Handle { + decay_ns: self.decay_ns, + sent_at: Instant::now(), + rtt_estimate: self.rtt_estimate.clone(), + } + } +} + +impl Service for PeakEwma +where + S: Service, + C: TrackCompletion, +{ + type Response = C::Output; + type Error = S::Error; + type Future = TrackCompletionFuture; + + fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll> { + self.service.poll_ready(cx) + } + + fn call(&mut self, req: Request) -> Self::Future { + TrackCompletionFuture::new( + self.completion.clone(), + self.handle(), + self.service.call(req), + ) + } +} + +impl Load for PeakEwma { + type Metric = Cost; + + fn load(&self) -> Self::Metric { + let pending = Arc::strong_count(&self.rtt_estimate) as u32 - 1; + + // Update the RTT estimate to account for decay since the last update. + // If an estimate has not been established, a default is provided + let estimate = self.update_estimate(); + + let cost = Cost(estimate * f64::from(pending + 1)); + trace!( + "load estimate={:.0}ms pending={} cost={:?}", + estimate / NANOS_PER_MILLI, + pending, + cost, + ); + cost + } +} + +impl PeakEwma { + fn update_estimate(&self) -> f64 { + let mut rtt = self.rtt_estimate.lock().expect("peak ewma prior_estimate"); + rtt.decay(self.decay_ns) + } +} + +// ===== impl PeakEwmaDiscover ===== + +#[cfg(feature = "discover")] +impl PeakEwmaDiscover { + /// Wraps a `D`-typed [`Discover`] so that services have a [`PeakEwma`] load metric. + /// + /// The provided `default_rtt` is used as the default RTT estimate for newly + /// added services. + /// + /// They `decay` value determines over what time period a RTT estimate should + /// decay. + pub fn new(discover: D, default_rtt: Duration, decay: Duration, completion: C) -> Self + where + D: Discover, + D::Service: Service, + C: TrackCompletion>::Response>, + { + PeakEwmaDiscover { + discover, + decay_ns: nanos(decay), + default_rtt, + completion, + } + } +} + +#[cfg(feature = "discover")] +impl Stream for PeakEwmaDiscover +where + D: Discover, + C: Clone, +{ + type Item = Result>, D::Error>; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.project(); + let change = match ready!(this.discover.poll_discover(cx)).transpose()? { + None => return Poll::Ready(None), + Some(Change::Remove(k)) => Change::Remove(k), + Some(Change::Insert(k, svc)) => { + let peak_ewma = PeakEwma::new( + svc, + *this.default_rtt, + *this.decay_ns, + this.completion.clone(), + ); + Change::Insert(k, peak_ewma) + } + }; + + Poll::Ready(Some(Ok(change))) + } +} + +// ===== impl RttEstimate ===== + +impl RttEstimate { + fn new(rtt_ns: f64) -> Self { + debug_assert!(0.0 < rtt_ns, "rtt must be positive"); + Self { + rtt_ns, + update_at: Instant::now(), + } + } + + /// Decays the RTT estimate with a decay period of `decay_ns`. + fn decay(&mut self, decay_ns: f64) -> f64 { + // Updates with a 0 duration so that the estimate decays towards 0. + let now = Instant::now(); + self.update(now, now, decay_ns) + } + + /// Updates the Peak-EWMA RTT estimate. + /// + /// The elapsed time from `sent_at` to `recv_at` is added + fn update(&mut self, sent_at: Instant, recv_at: Instant, decay_ns: f64) -> f64 { + debug_assert!( + sent_at <= recv_at, + "recv_at={:?} after sent_at={:?}", + recv_at, + sent_at + ); + let rtt = nanos(recv_at.saturating_duration_since(sent_at)); + + let now = Instant::now(); + debug_assert!( + self.update_at <= now, + "update_at={:?} in the future", + self.update_at + ); + + self.rtt_ns = if self.rtt_ns < rtt { + // For Peak-EWMA, always use the worst-case (peak) value as the estimate for + // subsequent requests. + trace!( + "update peak rtt={}ms prior={}ms", + rtt / NANOS_PER_MILLI, + self.rtt_ns / NANOS_PER_MILLI, + ); + rtt + } else { + // When an RTT is observed that is less than the estimated RTT, we decay the + // prior estimate according to how much time has elapsed since the last + // update. The inverse of the decay is used to scale the estimate towards the + // observed RTT value. + let elapsed = nanos(now.saturating_duration_since(self.update_at)); + let decay = (-elapsed / decay_ns).exp(); + let recency = 1.0 - decay; + let next_estimate = (self.rtt_ns * decay) + (rtt * recency); + trace!( + "update rtt={:03.0}ms decay={:06.0}ns; next={:03.0}ms", + rtt / NANOS_PER_MILLI, + self.rtt_ns - next_estimate, + next_estimate / NANOS_PER_MILLI, + ); + next_estimate + }; + self.update_at = now; + + self.rtt_ns + } +} + +// ===== impl Handle ===== + +impl Drop for Handle { + fn drop(&mut self) { + let recv_at = Instant::now(); + + if let Ok(mut rtt) = self.rtt_estimate.lock() { + rtt.update(self.sent_at, recv_at, self.decay_ns); + } + } +} + +// ===== impl Cost ===== + +// Utility that converts durations to nanos in f64. +// +// Due to a lossy transformation, the maximum value that can be represented is ~585 years, +// which, I hope, is more than enough to represent request latencies. +fn nanos(d: Duration) -> f64 { + const NANOS_PER_SEC: u64 = 1_000_000_000; + let n = f64::from(d.subsec_nanos()); + let s = d.as_secs().saturating_mul(NANOS_PER_SEC) as f64; + n + s +} + +#[cfg(test)] +mod tests { + use futures_util::future; + use std::time::Duration; + use tokio::time; + use tokio_test::{assert_ready, assert_ready_ok, task}; + + use super::*; + + struct Svc; + impl Service<()> for Svc { + type Response = (); + type Error = (); + type Future = future::Ready>; + + fn poll_ready(&mut self, _: &mut Context<'_>) -> Poll> { + Poll::Ready(Ok(())) + } + + fn call(&mut self, (): ()) -> Self::Future { + future::ok(()) + } + } + + /// The default RTT estimate decays, so that new nodes are considered if the + /// default RTT is too high. + #[tokio::test] + async fn default_decay() { + time::pause(); + + let svc = PeakEwma::new( + Svc, + Duration::from_millis(10), + NANOS_PER_MILLI * 1_000.0, + CompleteOnResponse, + ); + let Cost(load) = svc.load(); + assert_eq!(load, 10.0 * NANOS_PER_MILLI); + + time::advance(Duration::from_millis(100)).await; + let Cost(load) = svc.load(); + assert!(9.0 * NANOS_PER_MILLI < load && load < 10.0 * NANOS_PER_MILLI); + + time::advance(Duration::from_millis(100)).await; + let Cost(load) = svc.load(); + assert!(8.0 * NANOS_PER_MILLI < load && load < 9.0 * NANOS_PER_MILLI); + } + + // The default RTT estimate decays, so that new nodes are considered if the default RTT is too + // high. + #[tokio::test] + async fn compound_decay() { + time::pause(); + + let mut svc = PeakEwma::new( + Svc, + Duration::from_millis(20), + NANOS_PER_MILLI * 1_000.0, + CompleteOnResponse, + ); + assert_eq!(svc.load(), Cost(20.0 * NANOS_PER_MILLI)); + + time::advance(Duration::from_millis(100)).await; + let mut rsp0 = task::spawn(svc.call(())); + assert!(svc.load() > Cost(20.0 * NANOS_PER_MILLI)); + + time::advance(Duration::from_millis(100)).await; + let mut rsp1 = task::spawn(svc.call(())); + assert!(svc.load() > Cost(40.0 * NANOS_PER_MILLI)); + + time::advance(Duration::from_millis(100)).await; + let () = assert_ready_ok!(rsp0.poll()); + assert_eq!(svc.load(), Cost(400_000_000.0)); + + time::advance(Duration::from_millis(100)).await; + let () = assert_ready_ok!(rsp1.poll()); + assert_eq!(svc.load(), Cost(200_000_000.0)); + + // Check that values decay as time elapses + time::advance(Duration::from_secs(1)).await; + assert!(svc.load() < Cost(100_000_000.0)); + + time::advance(Duration::from_secs(10)).await; + assert!(svc.load() < Cost(100_000.0)); + } + + #[test] + fn nanos() { + assert_eq!(super::nanos(Duration::new(0, 0)), 0.0); + assert_eq!(super::nanos(Duration::new(0, 123)), 123.0); + assert_eq!(super::nanos(Duration::new(1, 23)), 1_000_000_023.0); + assert_eq!( + super::nanos(Duration::new(::std::u64::MAX, 999_999_999)), + 18446744074709553000.0 + ); + } +} diff --git a/web3_proxy/src/rpcs/consensus.rs b/web3_proxy/src/rpcs/consensus.rs index 62901b59..a348b9d6 100644 --- a/web3_proxy/src/rpcs/consensus.rs +++ b/web3_proxy/src/rpcs/consensus.rs @@ -127,10 +127,11 @@ impl ConnectionsGroup { .get_with(*block.hash(), async move { Instant::now() }) .await; - // TODO: this should be 0 if we are first seen, but i think it will be slightly non-zero - rpc.head_latency - .write() - .record(first_seen.elapsed().as_secs_f64() * 1000.0); + // TODO: this should be 0 if we are first seen, but i think it will be slightly non-zero. + // calculate elapsed time before trying to lock. + let latency = first_seen.elapsed(); + + rpc.head_latency.write().record(latency); // TODO: what about a reorg to the same height? if Some(block.number()) > self.highest_block.as_ref().map(|x| x.number()) { diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index cc671ab7..d53f0531 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -27,7 +27,7 @@ use serde_json::json; use serde_json::value::RawValue; use std::cmp::min_by_key; use std::collections::BTreeMap; -use std::sync::atomic::Ordering; +use std::sync::atomic::{self, Ordering}; use std::sync::Arc; use std::{cmp, fmt}; use thread_fast_rng::rand::seq::SliceRandom; @@ -592,7 +592,7 @@ impl Web3Rpcs { // TODO: cached key to save a read lock // TODO: ties to the server with the smallest block_data_limit let best_rpc = min_by_key(rpc_a, rpc_b, |x| { - OrderedFloat(x.request_latency.read().ewma.value()) + OrderedFloat(x.head_latency.read().value()) }); trace!("winner: {}", best_rpc); @@ -1159,9 +1159,16 @@ fn rpc_sync_status_sort_key(x: &Arc) -> (U64, u64, OrderedFloat) { let tier = x.tier; - let request_ewma = OrderedFloat(x.request_latency.read().ewma.value()); + // TODO: use request instead of head latency + let head_ewma = x.head_latency.read().value(); - (reversed_head_block, tier, request_ewma) + let active_requests = x.active_requests.load(atomic::Ordering::Relaxed) as f64; + + // TODO: i'm not sure head * active is exactly right. but we'll see + // TODO: i don't think this actually counts as peak. investigate with atomics.rs and peak_ewma.rs + let peak_ewma = OrderedFloat(head_ewma * active_requests); + + (reversed_head_block, tier, peak_ewma) } mod tests { diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index dcbfe220..1fc80ea4 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -8,12 +8,12 @@ use crate::frontend::authorization::Authorization; use crate::rpcs::request::RequestRevertHandler; use anyhow::{anyhow, Context}; use ethers::prelude::{Bytes, Middleware, ProviderError, TxHash, H256, U64}; -use ethers::types::{Transaction, U256}; +use ethers::types::{Address, Transaction, U256}; use futures::future::try_join_all; use futures::StreamExt; -use hdrhistogram::Histogram; use log::{debug, error, info, trace, warn, Level}; use migration::sea_orm::DatabaseConnection; +use ordered_float::OrderedFloat; use parking_lot::RwLock; use redis_rate_limiter::{RedisPool, RedisRateLimitResult, RedisRateLimiter}; use serde::ser::{SerializeStruct, Serializer}; @@ -30,10 +30,8 @@ use tokio::sync::{broadcast, oneshot, watch, RwLock as AsyncRwLock}; use tokio::time::{sleep, sleep_until, timeout, Duration, Instant}; pub struct Latency { - /// Track how many milliseconds slower we are than the fastest node - pub histogram: Histogram, /// exponentially weighted moving average of how many milliseconds behind the fastest node we are - pub ewma: ewma::EWMA, + ewma: ewma::EWMA, } impl Serialize for Latency { @@ -41,51 +39,52 @@ impl Serialize for Latency { where S: Serializer, { - let mut state = serializer.serialize_struct("latency", 6)?; - - state.serialize_field("ewma_ms", &self.ewma.value())?; - - state.serialize_field("histogram_len", &self.histogram.len())?; - state.serialize_field("mean_ms", &self.histogram.mean())?; - state.serialize_field("p50_ms", &self.histogram.value_at_quantile(0.50))?; - state.serialize_field("p75_ms", &self.histogram.value_at_quantile(0.75))?; - state.serialize_field("p99_ms", &self.histogram.value_at_quantile(0.99))?; - - state.end() + serializer.serialize_f64(self.ewma.value()) } } impl Latency { - pub fn record(&mut self, milliseconds: f64) { - self.ewma.add(milliseconds); + #[inline(always)] + pub fn record(&mut self, duration: Duration) { + self.record_ms(duration.as_secs_f64() * 1000.0); + } - // histogram needs ints and not floats - self.histogram.record(milliseconds as u64).unwrap(); + #[inline(always)] + pub fn record_ms(&mut self, milliseconds: f64) { + self.ewma.add(milliseconds); + } + + #[inline(always)] + pub fn value(&self) -> f64 { + self.ewma.value() } } impl Default for Latency { fn default() -> Self { - // TODO: what should the default sigfig be? - let sigfig = 0; - // TODO: what should the default span be? 25 requests? have a "new" let span = 25.0; - Self::new(sigfig, span).expect("default histogram sigfigs should always work") + let start = 1000.0; + + Self::new(span, start) } } impl Latency { - pub fn new(sigfig: u8, span: f64) -> Result { + // depending on the span, start might not be perfect + pub fn new(span: f64, start: f64) -> Self { let alpha = Self::span_to_alpha(span); - let histogram = Histogram::new(sigfig)?; + let mut ewma = ewma::EWMA::new(alpha); - Ok(Self { - histogram, - ewma: ewma::EWMA::new(alpha), - }) + if start > 0.0 { + for _ in 0..(span as u64) { + ewma.add(start); + } + } + + Self { ewma } } fn span_to_alpha(span: f64) -> f64 { @@ -127,11 +126,13 @@ pub struct Web3Rpc { pub(super) head_block: RwLock>, /// Track head block latency pub(super) head_latency: RwLock, - /// Track request latency - pub(super) request_latency: RwLock, + // /// Track request latency + // /// TODO: refactor this. this lock kills perf. for now just use head_latency + // pub(super) request_latency: RwLock, /// Track total requests served /// TODO: maybe move this to graphana pub(super) total_requests: AtomicUsize, + pub(super) active_requests: AtomicUsize, } impl Web3Rpc { @@ -259,6 +260,18 @@ impl Web3Rpc { Ok((new_connection, handle)) } + pub async fn peak_ewma(&self) -> OrderedFloat { + // TODO: use request instead of head latency? that was killing perf though + let head_ewma = self.head_latency.read().value(); + + // TODO: what ordering? + let active_requests = self.active_requests.load(atomic::Ordering::Relaxed) as f64; + + // TODO: i'm not sure head * active is exactly right. but we'll see + // TODO: i don't think this actually counts as peak. investigate with atomics.rs and peak_ewma.rs + OrderedFloat(head_ewma * active_requests) + } + // TODO: would be great if rpcs exposed this. see https://github.com/ledgerwatch/erigon/issues/6391 async fn check_block_data_limit( self: &Arc, @@ -671,14 +684,9 @@ impl Web3Rpc { // TODO: how often? different depending on the chain? // TODO: reset this timeout when a new block is seen? we need to keep request_latency updated though - // let health_sleep_seconds = 10; - - futures::future::pending::<()>().await; - - Ok(()) + let health_sleep_seconds = 10; // TODO: benchmark this and lock contention - /* let mut old_total_requests = 0; let mut new_total_requests; @@ -696,6 +704,7 @@ impl Web3Rpc { if new_total_requests - old_total_requests < 10 { // TODO: if this fails too many times, reset the connection + // TODO: move this into a function and the chaining should be easier let head_block = conn.head_block.read().clone(); if let Some((block_hash, txid)) = head_block.and_then(|x| { @@ -706,28 +715,65 @@ impl Web3Rpc { Some((block_hash, txid)) }) { - let authorization = authorization.clone(); - let conn = conn.clone(); + let to = conn + .wait_for_query::<_, Option>( + "eth_getTransactionByHash", + &(txid,), + revert_handler, + authorization.clone(), + Some(client.clone()), + ) + .await + .and_then(|tx| { + let tx = tx.context("no transaction found")?; - let x = async move { - conn.try_request_handle(&authorization, Some(client)).await - } - .await; + // TODO: what default? something real? + let to = tx.to.unwrap_or_else(|| { + "0xdead00000000000000000000000000000000beef" + .parse::
() + .expect("deafbeef") + }); - if let Ok(OpenRequestResult::Handle(x)) = x { - if let Ok(Some(x)) = x - .request::<_, Option>( - "eth_getTransactionByHash", - &(txid,), + Ok(to) + }); + + let code = match to { + Err(err) => { + if conn.backup { + debug!( + "{} failed health check query! {:#?}", + conn, err + ); + } else { + warn!( + "{} failed health check query! {:#?}", + conn, err + ); + } + continue; + } + Ok(to) => { + conn.wait_for_query::<_, Option>( + "eth_getCode", + &(to, block_hash), revert_handler, - None, + authorization.clone(), + Some(client), ) .await - { - // TODO: make this flatter - // TODO: do more (fair, not random) things here - // let = x.request("eth_getCode", (tx.to.unwrap_or(Address::zero()), block_hash), RequestRevertHandler::ErrorLevel, Some(client.clone())) } + }; + + if let Err(err) = code { + if conn.backup { + debug!( + "{} failed health check query! {:#?}", + conn, err + ); + } else { + warn!("{} failed health check query! {:#?}", conn, err); + } + continue; } } } @@ -735,7 +781,6 @@ impl Web3Rpc { old_total_requests = new_total_requests; } } - */ }; futures.push(flatten_handle(tokio::spawn(f))); @@ -1144,6 +1189,26 @@ impl Web3Rpc { Ok(OpenRequestResult::Handle(handle)) } + + pub async fn wait_for_query( + self: &Arc, + method: &str, + params: &P, + revert_handler: RequestRevertHandler, + authorization: Arc, + unlocked_provider: Option>, + ) -> anyhow::Result + where + // TODO: not sure about this type. would be better to not need clones, but measure and spawns combine to need it + P: Clone + fmt::Debug + serde::Serialize + Send + Sync + 'static, + R: serde::Serialize + serde::de::DeserializeOwned + fmt::Debug, + { + self.wait_for_request_handle(&authorization, None, None) + .await? + .request::(method, params, revert_handler, unlocked_provider) + .await + .context("ProviderError from the backend") + } } impl fmt::Debug for Web3Provider { @@ -1211,9 +1276,7 @@ impl Serialize for Web3Rpc { // TODO: maybe this is too much data. serialize less? state.serialize_field("head_block", &*self.head_block.read())?; - state.serialize_field("head_latency", &*self.head_latency.read())?; - - state.serialize_field("request_latency", &*self.request_latency.read())?; + state.serialize_field("head_latency", &self.head_latency.read().value())?; state.serialize_field( "total_requests", diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index 8a9254a6..139e3bba 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -139,6 +139,7 @@ impl OpenRequestHandle { /// Send a web3 request /// By having the request method here, we ensure that the rate limiter was called and connection counts were properly incremented + /// depending on how things are locked, you might need to pass the provider in pub async fn request( self, method: &str, @@ -156,30 +157,23 @@ impl OpenRequestHandle { // trace!(rpc=%self.conn, %method, "request"); trace!("requesting from {}", self.rpc); - let mut provider: Option> = None; + let mut provider = if unlocked_provider.is_some() { + unlocked_provider + } else { + self.rpc.provider.read().await.clone() + }; + let mut logged = false; while provider.is_none() { // trace!("waiting on provider: locking..."); - - // TODO: this should *not* be new_head_client. that is dedicated to only new heads - if let Some(unlocked_provider) = unlocked_provider { - provider = Some(unlocked_provider); - break; - } - - let unlocked_provider = self.rpc.provider.read().await; - - if let Some(unlocked_provider) = unlocked_provider.clone() { - provider = Some(unlocked_provider); - break; - } + sleep(Duration::from_millis(100)).await; if !logged { debug!("no provider for open handle on {}", self.rpc); logged = true; } - sleep(Duration::from_millis(100)).await; + provider = self.rpc.provider.read().await.clone(); } let provider = provider.expect("provider was checked already"); @@ -188,7 +182,11 @@ impl OpenRequestHandle { .total_requests .fetch_add(1, std::sync::atomic::Ordering::Relaxed); - let start = Instant::now(); + self.rpc + .active_requests + .fetch_add(1, std::sync::atomic::Ordering::Relaxed); + + // let latency = Instant::now(); // TODO: replace ethers-rs providers with our own that supports streaming the responses let response = match provider.as_ref() { @@ -201,6 +199,13 @@ impl OpenRequestHandle { } }; + // note. we intentionally do not record this latency now. we do NOT want to measure errors + // let latency = latency.elapsed(); + + self.rpc + .active_requests + .fetch_sub(1, std::sync::atomic::Ordering::Relaxed); + // // TODO: i think ethers already has trace logging (and does it much more fancy) // trace!( // "response from {} for {} {:?}: {:?}",