From 8a097dabbe2f97a39df485731b99ad208dbb8f40 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Fri, 12 May 2023 15:15:32 -0700 Subject: [PATCH] Bryan devel 2023-05-12 (#67) * add minor todo * BadRequest instead of web3_context * more bad request error codes * use tokio-uring for the tcp listener * clear block instead of panic * clone earlier * more watch channels instead of rwlocks * drop uring for now (its single threaded) and combine get/post/put routes * clean up iter vs into_iter and unnecessary collect * arcswap instead of rwlock for Web3Rpcs.by_name * cargo upgrade * uuid fast-rng and alphabetize * if protected rpcs, only use protected rpcs * listenfd * make connectinfo optional * try_get_with_by_ref instead of try_get_with * anyhow ensure. and try_get_with_as_ref isn't actually needed * fix feature flags * more refs and less clone * automatic retry for eth_getTransactionReceipt and eth_getTransactionByHash thanks for the report Lefteris @ Rotki * ArcSwap for provider * set archive_request to true on transaction retrying * merge durable stats * Revert "ArcSwap for provider" This reverts commit 166d77f204cde9fa7722c0cefecbb27008749d47. * comments * less clones * more refs * fix test * add optional mimalloc feature * remove stale dependency * sort * cargo upgrade * lint constants * add todo * another todo * lint * anyhow::ensure instead of panic * allow rpc_accounting_v2 entries for requests without an rpc key --- Cargo.lock | 52 +- Dockerfile | 6 +- README.md | 4 +- deferred-rate-limiter/src/lib.rs | 4 +- entities/Cargo.toml | 2 +- entities/src/rpc_accounting_v2.rs | 2 +- latency/src/util/atomic_f32_pair.rs | 7 +- migration/Cargo.toml | 2 +- migration/src/lib.rs | 2 + migration/src/m20221031_211916_clean_up.rs | 1 - migration/src/m20230117_191358_admin_table.rs | 2 - .../src/m20230205_130035_create_balance.rs | 2 - ...14_134254_increase_balance_transactions.rs | 1 - migration/src/m20230215_152254_admin_trail.rs | 1 - migration/src/m20230221_230953_track_spend.rs | 1 - ...20230422_172555_premium_downgrade_logic.rs | 5 - ...emove_columns_statsv2_origin_and_method.rs | 1 - ...20213_allow_null_rpc_key_id_in_stats_v2.rs | 48 ++ web3_proxy/Cargo.toml | 14 +- web3_proxy/src/app/mod.rs | 368 +++++------ web3_proxy/src/app/ws.rs | 168 ++--- web3_proxy/src/bin/wait_for_sync.rs | 26 +- web3_proxy/src/bin/web3_proxy_cli/main.rs | 17 +- .../bin/web3_proxy_cli/migrate_stats_to_v2.rs | 131 ++-- web3_proxy/src/bin/web3_proxy_cli/proxyd.rs | 21 +- .../src/bin/web3_proxy_cli/rpc_accounting.rs | 6 +- .../src/bin/web3_proxy_cli/sentryd/compare.rs | 13 +- .../src/bin/web3_proxy_cli/sentryd/mod.rs | 2 +- .../src/bin/web3_proxy_cli/user_import.rs | 16 +- web3_proxy/src/frontend/admin.rs | 4 +- web3_proxy/src/frontend/authorization.rs | 466 +++++++++++++- web3_proxy/src/frontend/errors.rs | 21 + web3_proxy/src/frontend/mod.rs | 136 ++--- web3_proxy/src/frontend/rpc_proxy_http.rs | 14 +- web3_proxy/src/frontend/rpc_proxy_ws.rs | 189 +++--- web3_proxy/src/frontend/status.rs | 8 +- .../src/frontend/users/authentication.rs | 4 +- web3_proxy/src/frontend/users/payment.rs | 5 +- web3_proxy/src/frontend/users/referral.rs | 3 +- web3_proxy/src/frontend/users/subuser.rs | 14 +- web3_proxy/src/jsonrpc.rs | 7 +- web3_proxy/src/rpcs/blockchain.rs | 11 +- web3_proxy/src/rpcs/consensus.rs | 168 ++++- web3_proxy/src/rpcs/many.rs | 575 +++++++++--------- web3_proxy/src/rpcs/one.rs | 101 +-- web3_proxy/src/stats/db_queries.rs | 1 + web3_proxy/src/stats/influxdb_queries.rs | 2 +- web3_proxy/src/stats/mod.rs | 461 ++++---------- web3_proxy/src/stats/stat_buffer.rs | 269 ++++++++ 49 files changed, 2001 insertions(+), 1383 deletions(-) create mode 100644 migration/src/m20230512_220213_allow_null_rpc_key_id_in_stats_v2.rs create mode 100644 web3_proxy/src/stats/stat_buffer.rs diff --git a/Cargo.lock b/Cargo.lock index 3fe8f6f0..5832affc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,6 +114,12 @@ dependencies = [ "backtrace", ] +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + [[package]] name = "argh" version = "0.1.10" @@ -1576,7 +1582,7 @@ dependencies = [ [[package]] name = "entities" -version = "0.17.0" +version = "0.27.0" dependencies = [ "ethers", "sea-orm", @@ -3041,6 +3047,16 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" +[[package]] +name = "libmimalloc-sys" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4ac0e912c8ef1b735e92369695618dc5b1819f5a7bf3f167301a3ba1cea515e" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "libz-sys" version = "1.1.9" @@ -3068,6 +3084,17 @@ version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" +[[package]] +name = "listenfd" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0500463acd96259d219abb05dc57e5a076ef04b2db9a2112846929b5f174c96" +dependencies = [ + "libc", + "uuid 1.3.2", + "winapi", +] + [[package]] name = "lock_api" version = "0.4.9" @@ -3143,12 +3170,21 @@ dependencies = [ [[package]] name = "migration" -version = "0.19.0" +version = "0.27.0" dependencies = [ "sea-orm-migration", "tokio", ] +[[package]] +name = "mimalloc" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2894987a3459f3ffb755608bd82188f8ed00d0ae077f1edea29c068d639d98" +dependencies = [ + "libmimalloc-sys", +] + [[package]] name = "mime" version = "0.3.17" @@ -4607,12 +4643,6 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hex" version = "2.1.0" @@ -6609,9 +6639,10 @@ dependencies = [ [[package]] name = "web3_proxy" -version = "0.17.0" +version = "0.27.0" dependencies = [ "anyhow", + "arc-swap", "argh", "axum", "axum-client-ip", @@ -6643,8 +6674,10 @@ dependencies = [ "ipnet", "itertools", "latency", + "listenfd", "log", "migration", + "mimalloc", "moka", "num", "num-traits", @@ -6659,7 +6692,6 @@ dependencies = [ "regex", "reqwest", "rmp-serde", - "rustc-hash", "sentry", "serde", "serde_json", diff --git a/Dockerfile b/Dockerfile index d9717123..5e6c3019 100644 --- a/Dockerfile +++ b/Dockerfile @@ -32,17 +32,19 @@ RUN apt-get update && \ # copy the application COPY . . +ENV WEB3_PROXY_FEATURES "rdkafka-src" + # test the application with cargo-nextest RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/app/target \ - cargo nextest run --features "rdkafka-src tokio-uring" --no-default-features + cargo nextest run --features "$WEB3_PROXY_FEATURES" --no-default-features # build the application # using a "release" profile (which install does) is **very** important RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/app/target \ cargo install \ - --features "rdkafka-src tokio-uring" \ + --features "$WEB3_PROXY_FEATURES" \ --locked \ --no-default-features \ --path ./web3_proxy \ diff --git a/README.md b/README.md index 8f70d866..af5dabf1 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,9 @@ web3_proxy_cli health_compass https://eth.llamarpc.com https://eth-ski.llamarpc. ### Run migrations -This is only really useful during development. The migrations run on application start. +Generally it is simplest to just run the app to run migrations. It runs migrations on start. + +But if you want to run them manually (generally only useful in development): ``` cd migration diff --git a/deferred-rate-limiter/src/lib.rs b/deferred-rate-limiter/src/lib.rs index f14fd4e5..8f055ce3 100644 --- a/deferred-rate-limiter/src/lib.rs +++ b/deferred-rate-limiter/src/lib.rs @@ -89,7 +89,7 @@ where // set arc_deferred_rate_limit_result and return the coun self.local_cache - .get_with(key, async move { + .get_with_by_ref(&key, async move { // we do not use the try operator here because we want to be okay with redis errors let redis_count = match rrl .throttle_label(&redis_key, Some(max_requests_per_period), count) @@ -110,7 +110,7 @@ where count } Ok(RedisRateLimitResult::RetryNever) => { - panic!("RetryNever shouldn't happen") + unreachable!(); } Err(err) => { let _ = deferred_rate_limit_result diff --git a/entities/Cargo.toml b/entities/Cargo.toml index 2e94c960..c88d7668 100644 --- a/entities/Cargo.toml +++ b/entities/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "entities" -version = "0.17.0" +version = "0.27.0" edition = "2021" [lib] diff --git a/entities/src/rpc_accounting_v2.rs b/entities/src/rpc_accounting_v2.rs index 49121125..d3cc8cb6 100644 --- a/entities/src/rpc_accounting_v2.rs +++ b/entities/src/rpc_accounting_v2.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize}; pub struct Model { #[sea_orm(primary_key)] pub id: u64, - pub rpc_key_id: u64, + pub rpc_key_id: Option, pub chain_id: u64, pub period_datetime: DateTimeUtc, pub archive_needed: bool, diff --git a/latency/src/util/atomic_f32_pair.rs b/latency/src/util/atomic_f32_pair.rs index 08ee3953..fa74fa0b 100644 --- a/latency/src/util/atomic_f32_pair.rs +++ b/latency/src/util/atomic_f32_pair.rs @@ -54,26 +54,27 @@ fn from_bits(bits: u64) -> [f32; 2] { #[cfg(test)] mod tests { + use std::f32; use std::sync::atomic::Ordering; use super::{from_bits, to_bits, AtomicF32Pair}; #[test] fn test_f32_pair_bit_conversions() { - let pair = [3.14159, 2.71828]; + let pair = [f32::consts::PI, f32::consts::E]; assert_eq!(pair, from_bits(to_bits(pair))); } #[test] fn test_atomic_f32_pair_load() { - let pair = [3.14159, 2.71828]; + let pair = [f32::consts::PI, f32::consts::E]; let atomic = AtomicF32Pair::new(pair); assert_eq!(pair, atomic.load(Ordering::Relaxed)); } #[test] fn test_atomic_f32_pair_fetch_update() { - let pair = [3.14159, 2.71828]; + let pair = [f32::consts::PI, f32::consts::E]; let atomic = AtomicF32Pair::new(pair); atomic .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |[f1, f2]| { diff --git a/migration/Cargo.toml b/migration/Cargo.toml index d8c07ad9..97f07b5e 100644 --- a/migration/Cargo.toml +++ b/migration/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "migration" -version = "0.19.0" +version = "0.27.0" edition = "2021" publish = false diff --git a/migration/src/lib.rs b/migration/src/lib.rs index ddd8160d..182f5a1e 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -26,6 +26,7 @@ mod m20230221_230953_track_spend; mod m20230412_171916_modify_secondary_user_add_primary_user; mod m20230422_172555_premium_downgrade_logic; mod m20230511_161214_remove_columns_statsv2_origin_and_method; +mod m20230512_220213_allow_null_rpc_key_id_in_stats_v2; pub struct Migrator; @@ -59,6 +60,7 @@ impl MigratorTrait for Migrator { Box::new(m20230412_171916_modify_secondary_user_add_primary_user::Migration), Box::new(m20230422_172555_premium_downgrade_logic::Migration), Box::new(m20230511_161214_remove_columns_statsv2_origin_and_method::Migration), + Box::new(m20230512_220213_allow_null_rpc_key_id_in_stats_v2::Migration), ] } } diff --git a/migration/src/m20221031_211916_clean_up.rs b/migration/src/m20221031_211916_clean_up.rs index 82a1df7a..7cceebf9 100644 --- a/migration/src/m20221031_211916_clean_up.rs +++ b/migration/src/m20221031_211916_clean_up.rs @@ -92,7 +92,6 @@ impl MigrationTrait for Migration { ) .await?; - // rename column rpc_key to rpc_secret_key Ok(()) } diff --git a/migration/src/m20230117_191358_admin_table.rs b/migration/src/m20230117_191358_admin_table.rs index 1d11ebf3..5315a276 100644 --- a/migration/src/m20230117_191358_admin_table.rs +++ b/migration/src/m20230117_191358_admin_table.rs @@ -6,7 +6,6 @@ pub struct Migration; #[async_trait::async_trait] impl MigrationTrait for Migration { async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .create_table( Table::create() @@ -36,7 +35,6 @@ impl MigrationTrait for Migration { } async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .drop_table(Table::drop().table(Admin::Table).to_owned()) .await diff --git a/migration/src/m20230205_130035_create_balance.rs b/migration/src/m20230205_130035_create_balance.rs index 11076fce..0036b151 100644 --- a/migration/src/m20230205_130035_create_balance.rs +++ b/migration/src/m20230205_130035_create_balance.rs @@ -6,7 +6,6 @@ pub struct Migration; #[async_trait::async_trait] impl MigrationTrait for Migration { async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .create_table( Table::create() @@ -48,7 +47,6 @@ impl MigrationTrait for Migration { } async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .drop_table(Table::drop().table(Balance::Table).to_owned()) .await diff --git a/migration/src/m20230214_134254_increase_balance_transactions.rs b/migration/src/m20230214_134254_increase_balance_transactions.rs index 72ea4d60..2de3db9a 100644 --- a/migration/src/m20230214_134254_increase_balance_transactions.rs +++ b/migration/src/m20230214_134254_increase_balance_transactions.rs @@ -68,7 +68,6 @@ impl MigrationTrait for Migration { } async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .drop_table( Table::drop() diff --git a/migration/src/m20230215_152254_admin_trail.rs b/migration/src/m20230215_152254_admin_trail.rs index c4c4d21b..0a758796 100644 --- a/migration/src/m20230215_152254_admin_trail.rs +++ b/migration/src/m20230215_152254_admin_trail.rs @@ -49,7 +49,6 @@ impl MigrationTrait for Migration { } async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .drop_table(Table::drop().table(AdminTrail::Table).to_owned()) .await diff --git a/migration/src/m20230221_230953_track_spend.rs b/migration/src/m20230221_230953_track_spend.rs index d6a62d32..30fec499 100644 --- a/migration/src/m20230221_230953_track_spend.rs +++ b/migration/src/m20230221_230953_track_spend.rs @@ -22,7 +22,6 @@ impl MigrationTrait for Migration { } async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts manager .alter_table( sea_query::Table::alter() diff --git a/migration/src/m20230422_172555_premium_downgrade_logic.rs b/migration/src/m20230422_172555_premium_downgrade_logic.rs index e474a785..a4ef2496 100644 --- a/migration/src/m20230422_172555_premium_downgrade_logic.rs +++ b/migration/src/m20230422_172555_premium_downgrade_logic.rs @@ -7,8 +7,6 @@ pub struct Migration; #[async_trait::async_trait] impl MigrationTrait for Migration { async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts - // Add a column "downgrade_tier_id" // It is a "foreign key" that references other items in this table manager @@ -18,7 +16,6 @@ impl MigrationTrait for Migration { .add_column(ColumnDef::new(UserTier::DowngradeTierId).big_unsigned()) .add_foreign_key( TableForeignKey::new() - .to_tbl(UserTier::Table) .to_tbl(UserTier::Table) .from_col(UserTier::DowngradeTierId) .to_col(UserTier::Id), @@ -83,8 +80,6 @@ impl MigrationTrait for Migration { } async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { - // Replace the sample below with your own migration scripts - // Remove the two tiers that you just added // And remove the column you just added let db_conn = manager.get_connection(); diff --git a/migration/src/m20230511_161214_remove_columns_statsv2_origin_and_method.rs b/migration/src/m20230511_161214_remove_columns_statsv2_origin_and_method.rs index 0dc736c1..a5463c69 100644 --- a/migration/src/m20230511_161214_remove_columns_statsv2_origin_and_method.rs +++ b/migration/src/m20230511_161214_remove_columns_statsv2_origin_and_method.rs @@ -44,7 +44,6 @@ impl MigrationTrait for Migration { #[derive(Iden)] enum RpcAccountingV2 { Table, - Id, Origin, Method, } diff --git a/migration/src/m20230512_220213_allow_null_rpc_key_id_in_stats_v2.rs b/migration/src/m20230512_220213_allow_null_rpc_key_id_in_stats_v2.rs new file mode 100644 index 00000000..b9d35ae9 --- /dev/null +++ b/migration/src/m20230512_220213_allow_null_rpc_key_id_in_stats_v2.rs @@ -0,0 +1,48 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + sea_query::Table::alter() + .table(RpcAccountingV2::Table) + .to_owned() + // allow rpc_key_id to be null. Needed for public rpc stat tracking + .modify_column( + ColumnDef::new(RpcAccountingV2::RpcKeyId) + .big_unsigned() + .null(), + ) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .alter_table( + sea_query::Table::alter() + .table(RpcAccountingV2::Table) + .to_owned() + .modify_column( + ColumnDef::new(RpcAccountingV2::RpcKeyId) + .big_unsigned() + .not_null() + .default(0), + ) + .to_owned(), + ) + .await + } +} + +/// Learn more at https://docs.rs/sea-query#iden +#[derive(Iden)] +enum RpcAccountingV2 { + Table, + RpcKeyId, +} diff --git a/web3_proxy/Cargo.toml b/web3_proxy/Cargo.toml index e8d65868..4883f9a2 100644 --- a/web3_proxy/Cargo.toml +++ b/web3_proxy/Cargo.toml @@ -1,16 +1,18 @@ [package] name = "web3_proxy" -version = "0.17.0" +version = "0.27.0" edition = "2021" default-run = "web3_proxy_cli" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [features] -default = ["deadlock_detection"] +default = ["connectinfo", "deadlock_detection"] deadlock_detection = ["parking_lot/deadlock_detection"] +mimalloc = ["dep:mimalloc"] tokio-console = ["dep:tokio-console", "dep:console-subscriber"] rdkafka-src = ["rdkafka/cmake-build", "rdkafka/libz", "rdkafka/ssl", "rdkafka/zstd-pkg-config"] +connectinfo = [] [dependencies] deferred-rate-limiter = { path = "../deferred-rate-limiter" } @@ -27,6 +29,7 @@ thread-fast-rng = { path = "../thread-fast-rng" } # TODO: make sure this time version matches siwe. PR to put this in their prelude anyhow = { version = "1.0.71", features = ["backtrace"] } +arc-swap = "1.6.0" argh = "0.1.10" axum = { version = "0.6.18", features = ["headers", "ws"] } axum-client-ip = "0.4.1" @@ -47,15 +50,17 @@ gethostname = "0.4.2" glob = "0.3.1" handlebars = "4.3.7" hashbrown = { version = "0.13.2", features = ["serde"] } -hex_fmt = "0.3.0" hdrhistogram = "7.5.2" +hex_fmt = "0.3.0" +hostname = "0.3.1" http = "0.2.9" influxdb2 = { git = "https://github.com/llamanodes/influxdb2", features = ["rustls"] } influxdb2-structmap = { git = "https://github.com/llamanodes/influxdb2/"} -hostname = "0.3.1" ipnet = "2.7.2" itertools = "0.10.5" +listenfd = "1.0.1" log = "0.4.17" +mimalloc = { version = "0.1.37", optional = true} moka = { version = "0.11.0", default-features = false, features = ["future"] } num = "0.4.0" num-traits = "0.2.15" @@ -69,7 +74,6 @@ rdkafka = { version = "0.29.0" } regex = "1.8.1" reqwest = { version = "0.11.17", default-features = false, features = ["json", "tokio-rustls"] } rmp-serde = "1.1.1" -rustc-hash = "1.1.0" sentry = { version = "0.31.0", default-features = false, features = ["backtrace", "contexts", "panic", "anyhow", "reqwest", "rustls", "log", "sentry-log"] } serde = { version = "1.0.163", features = [] } serde_json = { version = "1.0.96", default-features = false, features = ["alloc", "raw_value"] } diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index 46072197..63c3f9f5 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -3,7 +3,9 @@ mod ws; use crate::block_number::{block_needed, BlockNeeded}; use crate::config::{AppConfig, TopConfig}; -use crate::frontend::authorization::{Authorization, RequestMetadata, RpcSecretKey}; +use crate::frontend::authorization::{ + Authorization, RequestMetadata, RequestOrMethod, RpcSecretKey, +}; use crate::frontend::errors::{Web3ProxyError, Web3ProxyErrorContext, Web3ProxyResult}; use crate::frontend::rpc_proxy_ws::ProxyMode; use crate::jsonrpc::{ @@ -14,7 +16,7 @@ use crate::rpcs::consensus::ConsensusWeb3Rpcs; use crate::rpcs::many::Web3Rpcs; use crate::rpcs::one::Web3Rpc; use crate::rpcs::transactions::TxStatus; -use crate::stats::{AppStat, RpcQueryStats, StatBuffer}; +use crate::stats::{AppStat, StatBuffer}; use crate::user_token::UserBearerToken; use anyhow::Context; use axum::headers::{Origin, Referer, UserAgent}; @@ -40,8 +42,6 @@ use migration::sea_orm::{ use migration::sea_query::table::ColumnDef; use migration::{Alias, DbErr, Migrator, MigratorTrait, Table}; use moka::future::Cache; -use rdkafka::message::{Header, OwnedHeaders}; -use rdkafka::producer::FutureRecord; use redis_rate_limiter::redis::AsyncCommands; use redis_rate_limiter::{redis, DeadpoolRuntime, RedisConfig, RedisPool, RedisRateLimiter}; use serde::Serialize; @@ -78,7 +78,6 @@ struct ResponseCacheKey { // to_block is only set when ranges of blocks are requested (like with eth_getLogs) to_block: Option, method: String, - // TODO: better type for this params: Option, cache_errors: bool, } @@ -87,7 +86,7 @@ impl ResponseCacheKey { fn weight(&self) -> usize { let mut w = self.method.len(); - if let Some(p) = self.params.as_ref() { + if let Some(ref p) = self.params { w += p.to_string().len(); } @@ -206,6 +205,10 @@ impl DatabaseReplica { } } +// TODO: this should be a the secret key id, not the key itself! +pub type RpcSecretKeyCache = + Cache; + /// The application // TODO: i'm sure this is more arcs than necessary, but spawning futures makes references hard pub struct Web3ProxyApp { @@ -251,8 +254,7 @@ pub struct Web3ProxyApp { pub vredis_pool: Option, /// cache authenticated users so that we don't have to query the database on the hot path // TODO: should the key be our RpcSecretKey class instead of Ulid? - pub rpc_secret_key_cache: - Cache, + pub rpc_secret_key_cache: RpcSecretKeyCache, /// concurrent/parallel RPC request limits for authenticated users pub registered_user_semaphores: Cache, hashbrown::hash_map::DefaultHashBuilder>, @@ -276,8 +278,7 @@ pub async fn flatten_handle(handle: AnyhowJoinHandle) -> anyhow::Result } } -/// return the first error or okay if everything worked - +/// return the first error, or Ok if everything worked pub async fn flatten_handles( mut handles: FuturesUnordered>, ) -> anyhow::Result<()> { @@ -493,11 +494,10 @@ impl Web3ProxyApp { db_conn.clone().map(DatabaseReplica) }; } else { - if top_config.app.db_replica_url.is_some() { - return Err(anyhow::anyhow!( - "if there is a db_replica_url, there must be a db_url" - )); - } + anyhow::ensure!( + top_config.app.db_replica_url.is_none(), + "if there is a db_replica_url, there must be a db_url" + ); warn!("no database. some features will be disabled"); }; @@ -516,7 +516,10 @@ impl Web3ProxyApp { .set("security.protocol", security_protocol) .create() { - Ok(k) => kafka_producer = Some(k), + Ok(k) => { + // TODO: create our topic + kafka_producer = Some(k) + } Err(err) => error!("Failed connecting to kafka. This will not retry. {:?}", err), } } @@ -596,15 +599,15 @@ impl Web3ProxyApp { let mut stat_sender = None; if let Some(influxdb_bucket) = top_config.app.influxdb_bucket.clone() { if let Some(spawned_stat_buffer) = StatBuffer::try_spawn( - top_config.app.chain_id, + BILLING_PERIOD_SECONDS, influxdb_bucket, + top_config.app.chain_id, db_conn.clone(), + 60, influxdb_client.clone(), Some(rpc_secret_key_cache.clone()), - 60, - 1, - BILLING_PERIOD_SECONDS, stat_buffer_shutdown_receiver, + 1, )? { // since the database entries are used for accounting, we want to be sure everything is saved before exiting important_background_handles.push(spawned_stat_buffer.background_handle); @@ -634,7 +637,7 @@ impl Web3ProxyApp { let mut frontend_registered_user_rate_limiter = None; let mut login_rate_limiter = None; - if let Some(redis_pool) = vredis_pool.as_ref() { + if let Some(ref redis_pool) = vredis_pool { if let Some(public_requests_per_period) = top_config.app.public_requests_per_period { // chain id is included in the app name so that rpc rate limits are per-chain let rpc_rrl = RedisRateLimiter::new( @@ -669,7 +672,6 @@ impl Web3ProxyApp { )); } - // TODO: i don't like doing Block::default here! Change this to "None"? let (watch_consensus_head_sender, watch_consensus_head_receiver) = watch::channel(None); // TODO: will one receiver lagging be okay? how big should this be? let (pending_tx_sender, pending_tx_receiver) = broadcast::channel(256); @@ -894,7 +896,7 @@ impl Web3ProxyApp { .context("updating balanced rpcs")?; if let Some(private_rpc_configs) = new_top_config.private_rpcs { - if let Some(private_rpcs) = self.private_rpcs.as_ref() { + if let Some(ref private_rpcs) = self.private_rpcs { private_rpcs .apply_server_configs(self, private_rpc_configs) .await @@ -906,7 +908,7 @@ impl Web3ProxyApp { } if let Some(bundler_4337_rpc_configs) = new_top_config.bundler_4337_rpcs { - if let Some(bundler_4337_rpcs) = self.bundler_4337_rpcs.as_ref() { + if let Some(ref bundler_4337_rpcs) = self.bundler_4337_rpcs { bundler_4337_rpcs .apply_server_configs(self, bundler_4337_rpc_configs) .await @@ -1106,23 +1108,29 @@ impl Web3ProxyApp { self: &Arc, authorization: Arc, request: JsonRpcRequestEnum, - ) -> Web3ProxyResult<(JsonRpcForwardedResponseEnum, Vec>)> { + ) -> Web3ProxyResult<(StatusCode, JsonRpcForwardedResponseEnum, Vec>)> { // trace!(?request, "proxy_web3_rpc"); // even though we have timeouts on the requests to our backend providers, // we need a timeout for the incoming request so that retries don't run forever - // TODO: take this as an optional argument. per user max? expiration time instead of duration? - let max_time = Duration::from_secs(120); + // TODO: take this as an optional argument. check for a different max from the user_tier? + // TODO: how much time was spent on this request alredy? + let max_time = Duration::from_secs(240); + // TODO: use streams and buffers so we don't overwhelm our server let response = match request { - JsonRpcRequestEnum::Single(request) => { - let (response, rpcs) = timeout( + JsonRpcRequestEnum::Single(mut request) => { + let (status_code, response, rpcs) = timeout( max_time, - self.proxy_cached_request(&authorization, request, None), + self.proxy_cached_request(&authorization, &mut request, None), ) - .await??; + .await?; - (JsonRpcForwardedResponseEnum::Single(response), rpcs) + ( + status_code, + JsonRpcForwardedResponseEnum::Single(response), + rpcs, + ) } JsonRpcRequestEnum::Batch(requests) => { let (responses, rpcs) = timeout( @@ -1131,7 +1139,12 @@ impl Web3ProxyApp { ) .await??; - (JsonRpcForwardedResponseEnum::Batch(responses), rpcs) + // TODO: real status code + ( + StatusCode::OK, + JsonRpcForwardedResponseEnum::Batch(responses), + rpcs, + ) } }; @@ -1143,14 +1156,11 @@ impl Web3ProxyApp { async fn proxy_web3_rpc_requests( self: &Arc, authorization: &Arc, - requests: Vec, + mut requests: Vec, ) -> Web3ProxyResult<(Vec, Vec>)> { // TODO: we should probably change ethers-rs to support this directly. they pushed this off to v2 though let num_requests = requests.len(); - // TODO: spawn so the requests go in parallel? need to think about rate limiting more if we do that - // TODO: improve flattening - // get the head block now so that any requests that need it all use the same block // TODO: this still has an edge condition if there is a reorg in the middle of the request!!! let head_block_num = self @@ -1160,7 +1170,7 @@ impl Web3ProxyApp { let responses = join_all( requests - .into_iter() + .iter_mut() .map(|request| { self.proxy_cached_request(authorization, request, Some(head_block_num)) }) @@ -1168,14 +1178,12 @@ impl Web3ProxyApp { ) .await; - // TODO: i'm sure this could be done better with iterators - // TODO: stream the response? let mut collected: Vec = Vec::with_capacity(num_requests); let mut collected_rpc_names: HashSet = HashSet::new(); let mut collected_rpcs: Vec> = vec![]; for response in responses { // TODO: any way to attach the tried rpcs to the error? it is likely helpful - let (response, rpcs) = response?; + let (status_code, response, rpcs) = response; collected.push(response); collected_rpcs.extend(rpcs.into_iter().filter(|x| { @@ -1186,6 +1194,8 @@ impl Web3ProxyApp { true } })); + + // TODO: what should we do with the status code? check the jsonrpc spec } Ok((collected, collected_rpcs)) @@ -1212,8 +1222,8 @@ impl Web3ProxyApp { } } - /// try to send transactions to the best available rpcs with private mempools - /// if no private rpcs are configured, then some public rpcs are used instead + /// try to send transactions to the best available rpcs with protected/private mempools + /// if no protected rpcs are configured, then some public rpcs are used instead async fn try_send_protected( self: &Arc, authorization: &Arc, @@ -1261,82 +1271,50 @@ impl Web3ProxyApp { async fn proxy_cached_request( self: &Arc, authorization: &Arc, - mut request: JsonRpcRequest, + request: &mut JsonRpcRequest, head_block_num: Option, - ) -> Web3ProxyResult<(JsonRpcForwardedResponse, Vec>)> { + ) -> (StatusCode, JsonRpcForwardedResponse, Vec>) { // TODO: move this code to another module so that its easy to turn this trace logging on in dev trace!("Received request: {:?}", request); - let request_metadata = Arc::new(RequestMetadata::new(request.num_bytes())); + let request_metadata = RequestMetadata::new( + self, + authorization.clone(), + RequestOrMethod::Request(request), + head_block_num.as_ref(), + ) + .await; - let mut kafka_stuff = None; + let (status_code, response) = match self + ._proxy_cached_request(authorization, request, head_block_num, &request_metadata) + .await + { + Ok(x) => (StatusCode::OK, x), + Err(err) => err.into_response_parts(), + }; - if matches!(authorization.checks.proxy_mode, ProxyMode::Debug) { - if let Some(kafka_producer) = self.kafka_producer.clone() { - let kafka_topic = "proxy_cached_request".to_string(); + request_metadata.add_response(&response); - let rpc_secret_key_id = authorization - .checks - .rpc_secret_key_id - .map(|x| x.get()) - .unwrap_or_default(); + // TODO: with parallel request sending, I think there could be a race on this + let rpcs = request_metadata.backend_rpcs_used(); - let kafka_key = rmp_serde::to_vec(&rpc_secret_key_id)?; - - let request_bytes = rmp_serde::to_vec(&request)?; - - let request_hash = Some(keccak256(&request_bytes)); - - let chain_id = self.config.chain_id; - - // another item is added with the response, so initial_capacity is +1 what is needed here - let kafka_headers = OwnedHeaders::new_with_capacity(4) - .insert(Header { - key: "request_hash", - value: request_hash.as_ref(), - }) - .insert(Header { - key: "head_block_num", - value: head_block_num.map(|x| x.to_string()).as_ref(), - }) - .insert(Header { - key: "chain_id", - value: Some(&chain_id.to_le_bytes()), - }); - - // save the key and headers for when we log the response - kafka_stuff = Some(( - kafka_topic.clone(), - kafka_key.clone(), - kafka_headers.clone(), - )); - - let f = async move { - let produce_future = kafka_producer.send( - FutureRecord::to(&kafka_topic) - .key(&kafka_key) - .payload(&request_bytes) - .headers(kafka_headers), - Duration::from_secs(0), - ); - - if let Err((err, _)) = produce_future.await { - error!("produce kafka request log: {}", err); - // TODO: re-queue the msg? - } - }; - - tokio::spawn(f); - } - } + (status_code, response, rpcs) + } + /// main logic for proxy_cached_request but in a dedicated function so the try operator is easy to use + async fn _proxy_cached_request( + self: &Arc, + authorization: &Arc, + request: &mut JsonRpcRequest, + head_block_num: Option, + request_metadata: &Arc, + ) -> Web3ProxyResult { // save the id so we can attach it to the response - // TODO: instead of cloning, take the id out? let request_id = request.id.clone(); + // TODO: don't clone let request_method = request.method.clone(); - // TODO: if eth_chainId or net_version, serve those without querying the backend - // TODO: don't clone? + // TODO: serve net_version without querying the backend let response: JsonRpcForwardedResponse = match request_method.as_ref() { // lots of commands are blocked method @ ("db_getHex" @@ -1449,15 +1427,15 @@ impl Web3ProxyApp { .try_proxy_connection( authorization, request, - Some(&request_metadata), + Some(request_metadata), None, None, ) .await? } None => { - // TODO: stats! - // TODO: not synced error? + // TODO: stats even when we error! + // TODO: use Web3ProxyError? dedicated error for no 4337 bundlers return Err(anyhow::anyhow!("no bundler_4337_rpcs available").into()); } }, @@ -1493,26 +1471,19 @@ impl Web3ProxyApp { .try_proxy_connection( authorization, request, - Some(&request_metadata), + Some(request_metadata), None, None, ) .await?; - let mut gas_estimate: U256 = if let Some(gas_estimate) = response.result.take() { - serde_json::from_str(gas_estimate.get()) - .or(Err(Web3ProxyError::GasEstimateNotU256))? - } else { - // i think this is always an error response - let rpcs = request_metadata.backend_requests.lock().clone(); + if let Some(gas_estimate) = response.result.take() { + let mut gas_estimate: U256 = serde_json::from_str(gas_estimate.get()) + .or(Err(Web3ProxyError::GasEstimateNotU256))?; - // TODO! save stats - - return Ok((response, rpcs)); - }; - - let gas_increase = - if let Some(gas_increase_percent) = self.config.gas_increase_percent { + let gas_increase = if let Some(gas_increase_percent) = + self.config.gas_increase_percent + { let gas_increase = gas_estimate * gas_increase_percent / U256::from(100); let min_gas_increase = self.config.gas_increase_min.unwrap_or_default(); @@ -1522,9 +1493,12 @@ impl Web3ProxyApp { self.config.gas_increase_min.unwrap_or_default() }; - gas_estimate += gas_increase; + gas_estimate += gas_increase; - JsonRpcForwardedResponse::from_value(json!(gas_estimate), request_id) + JsonRpcForwardedResponse::from_value(json!(gas_estimate), request_id) + } else { + response + } } "eth_getTransactionReceipt" | "eth_getTransactionByHash" => { // try to get the transaction without specifying a min_block_height @@ -1532,8 +1506,8 @@ impl Web3ProxyApp { .balanced_rpcs .try_proxy_connection( authorization, - request.clone(), - Some(&request_metadata), + request, + Some(request_metadata), None, None, ) @@ -1551,7 +1525,7 @@ impl Web3ProxyApp { .try_proxy_connection( authorization, request, - Some(&request_metadata), + Some(request_metadata), Some(&U64::one()), None, ) @@ -1583,7 +1557,7 @@ impl Web3ProxyApp { let mut response = self .try_send_protected( authorization, - &request, + request, request_metadata.clone(), num_public_rpcs, ) @@ -1592,24 +1566,36 @@ impl Web3ProxyApp { // sometimes we get an error that the transaction is already known by our nodes, // that's not really an error. Return the hash like a successful response would. // TODO: move this to a helper function - if let Some(response_error) = response.error.as_ref() { + if let Some(ref response_error) = response.error { if response_error.code == -32000 && (response_error.message == "ALREADY_EXISTS: already known" || response_error.message == "INTERNAL_ERROR: existing tx with same hash") { - // TODO: expect instead of web3_context? let params = request .params + .as_mut() .web3_context("there must be params if we got this far")?; let params = params .as_array() - .web3_context("there must be an array if we got this far")? + .ok_or_else(|| { + Web3ProxyError::BadRequest( + "Unable to get array from params".to_string(), + ) + })? .get(0) - .web3_context("there must be an item if we got this far")? + .ok_or_else(|| { + Web3ProxyError::BadRequest( + "Unable to get item 0 from params".to_string(), + ) + })? .as_str() - .web3_context("there must be a string if we got this far")?; + .ok_or_else(|| { + Web3ProxyError::BadRequest( + "Unable to get string from params item 0".to_string(), + ) + })?; let params = Bytes::from_str(params) .expect("there must be Bytes if we got this far"); @@ -1617,6 +1603,7 @@ impl Web3ProxyApp { let rlp = Rlp::new(params.as_ref()); if let Ok(tx) = Transaction::decode(&rlp) { + // TODO: decode earlier and confirm that tx.chain_id (if set) matches self.config.chain_id let tx_hash = json!(tx.hash()); trace!("tx_hash: {:#?}", tx_hash); @@ -1630,17 +1617,16 @@ impl Web3ProxyApp { } // emit transaction count stats - if let Some(salt) = self.config.public_recent_ips_salt.as_ref() { - if let Some(tx_hash) = response.result.clone() { + if let Some(ref salt) = self.config.public_recent_ips_salt { + if let Some(ref tx_hash) = response.result { let now = Utc::now().timestamp(); - let salt = salt.clone(); let app = self.clone(); + let salted_tx_hash = format!("{}:{}", salt, tx_hash); + let f = async move { match app.redis_conn().await { Ok(Some(mut redis_conn)) => { - let salted_tx_hash = format!("{}:{}", salt, tx_hash); - let hashed_tx_hash = Bytes::from(keccak256(salted_tx_hash.as_bytes())); @@ -1685,7 +1671,7 @@ impl Web3ProxyApp { Some(request_id), ), "net_listening" => { - // TODO: only if there are some backends on balanced_rpcs? + // TODO: only true if there are some backends on balanced_rpcs? JsonRpcForwardedResponse::from_value(serde_json::Value::Bool(true), request_id) } "net_peerCount" => JsonRpcForwardedResponse::from_value( @@ -1705,35 +1691,35 @@ impl Web3ProxyApp { || !params.get(0).map(|x| x.is_string()).unwrap_or(false) { // TODO: what error code? - return Ok(( - JsonRpcForwardedResponse::from_str( - "Invalid request", - Some(-32600), - Some(request_id), - ), - vec![], - )); - } - - let param = Bytes::from_str( - params[0] - .as_str() - .ok_or(Web3ProxyError::ParseBytesError(None)) - .web3_context("parsing params 0 into str then bytes")?, - ) - .map_err(|x| { - trace!("bad request: {:?}", x); - Web3ProxyError::BadRequest( - "param 0 could not be read as H256".to_string(), + // TODO: use Web3ProxyError::BadRequest + JsonRpcForwardedResponse::from_str( + "Invalid request", + Some(-32600), + Some(request_id), ) - })?; + } else { + // TODO: BadRequest instead of web3_context + let param = Bytes::from_str( + params[0] + .as_str() + .ok_or(Web3ProxyError::ParseBytesError(None)) + .web3_context("parsing params 0 into str then bytes")?, + ) + .map_err(|x| { + trace!("bad request: {:?}", x); + Web3ProxyError::BadRequest( + "param 0 could not be read as H256".to_string(), + ) + })?; - let hash = H256::from(keccak256(param)); + let hash = H256::from(keccak256(param)); - JsonRpcForwardedResponse::from_value(json!(hash), request_id) + JsonRpcForwardedResponse::from_value(json!(hash), request_id) + } } _ => { // TODO: this needs the correct error code in the response + // TODO: Web3ProxyError::BadRequest instead? JsonRpcForwardedResponse::from_str( "invalid request", Some(StatusCode::BAD_REQUEST.as_u16().into()), @@ -1759,6 +1745,9 @@ impl Web3ProxyApp { .or(self.balanced_rpcs.head_block_num()) .ok_or(Web3ProxyError::NoServersSynced)?; + // TODO: don't clone. this happens way too much. maybe &mut? + let mut request = request.clone(); + // we do this check before checking caches because it might modify the request params // TODO: add a stat for archive vs full since they should probably cost different // TODO: this cache key can be rather large. is that okay? @@ -1867,7 +1856,7 @@ impl Web3ProxyApp { .balanced_rpcs .try_proxy_connection( &authorization, - request, + &request, Some(&request_metadata), from_block_num.as_ref(), to_block_num.as_ref(), @@ -1888,7 +1877,7 @@ impl Web3ProxyApp { self.balanced_rpcs .try_proxy_connection( &authorization, - request, + &request, Some(&request_metadata), None, None, @@ -1897,7 +1886,7 @@ impl Web3ProxyApp { } }; - // since this data came likely out of a cache, the id is not going to match + // since this data likely came out of a cache, the response.id is not going to match the request.id // replace the id with our request's id. response.id = request_id; @@ -1905,52 +1894,7 @@ impl Web3ProxyApp { } }; - // save the rpcs so they can be included in a response header - let rpcs = request_metadata.backend_requests.lock().clone(); - - // send stats used for accounting and graphs - if let Some(stat_sender) = self.stat_sender.as_ref() { - let response_stat = RpcQueryStats::new( - Some(request_method), - authorization.clone(), - request_metadata, - response.num_bytes(), - ); - - stat_sender - .send_async(response_stat.into()) - .await - .map_err(Web3ProxyError::SendAppStatError)?; - } - - // send debug info as a kafka log - if let Some((kafka_topic, kafka_key, kafka_headers)) = kafka_stuff { - let kafka_producer = self - .kafka_producer - .clone() - .expect("if headers are set, producer must exist"); - - let response_bytes = - rmp_serde::to_vec(&response).web3_context("failed msgpack serialize response")?; - - let f = async move { - let produce_future = kafka_producer.send( - FutureRecord::to(&kafka_topic) - .key(&kafka_key) - .payload(&response_bytes) - .headers(kafka_headers), - Duration::from_secs(0), - ); - - if let Err((err, _)) = produce_future.await { - error!("produce kafka response log: {}", err); - } - }; - - tokio::spawn(f); - } - - Ok((response, rpcs)) + Ok(response) } } diff --git a/web3_proxy/src/app/ws.rs b/web3_proxy/src/app/ws.rs index 4b57c8a8..db348577 100644 --- a/web3_proxy/src/app/ws.rs +++ b/web3_proxy/src/app/ws.rs @@ -1,56 +1,55 @@ //! Websocket-specific functions for the Web3ProxyApp use super::Web3ProxyApp; -use crate::frontend::authorization::{Authorization, RequestMetadata}; -use crate::frontend::errors::{Web3ProxyError, Web3ProxyErrorContext, Web3ProxyResult}; +use crate::frontend::authorization::{Authorization, RequestMetadata, RequestOrMethod}; +use crate::frontend::errors::{Web3ProxyError, Web3ProxyResult}; use crate::jsonrpc::JsonRpcForwardedResponse; use crate::jsonrpc::JsonRpcRequest; use crate::rpcs::transactions::TxStatus; -use crate::stats::RpcQueryStats; use axum::extract::ws::Message; use ethers::prelude::U64; use futures::future::AbortHandle; use futures::future::Abortable; use futures::stream::StreamExt; -use log::{trace, warn}; +use log::trace; use serde_json::json; use std::sync::atomic::{self, AtomicUsize}; use std::sync::Arc; use tokio_stream::wrappers::{BroadcastStream, WatchStream}; impl Web3ProxyApp { - // TODO: #[measure([ErrorCount, HitCount, ResponseTime, Throughput])] pub async fn eth_subscribe<'a>( self: &'a Arc, authorization: Arc, - request_json: JsonRpcRequest, + jsonrpc_request: JsonRpcRequest, subscription_count: &'a AtomicUsize, // TODO: taking a sender for Message instead of the exact json we are planning to send feels wrong, but its easier for now response_sender: flume::Sender, ) -> Web3ProxyResult<(AbortHandle, JsonRpcForwardedResponse)> { - // TODO: this is not efficient - let request_bytes = serde_json::to_string(&request_json) - .web3_context("finding request size")? - .len(); - - let request_metadata = Arc::new(RequestMetadata::new(request_bytes)); + let request_metadata = RequestMetadata::new( + self, + authorization.clone(), + RequestOrMethod::Request(&jsonrpc_request), + None, + ) + .await; let (subscription_abort_handle, subscription_registration) = AbortHandle::new_pair(); // TODO: this only needs to be unique per connection. we don't need it globably unique + // TODO: have a max number of subscriptions per key/ip. have a global max number of subscriptions? how should this be calculated? let subscription_id = subscription_count.fetch_add(1, atomic::Ordering::SeqCst); let subscription_id = U64::from(subscription_id); // save the id so we can use it in the response - let id = request_json.id.clone(); + let id = jsonrpc_request.id.clone(); // TODO: calling json! on every request is probably not fast. but we can only match against // TODO: i think we need a stricter EthSubscribeRequest type that JsonRpcRequest can turn into - match request_json.params.as_ref() { + match jsonrpc_request.params.as_ref() { Some(x) if x == &json!(["newHeads"]) => { - let authorization = authorization.clone(); let head_block_receiver = self.watch_consensus_head_receiver.clone(); - let stat_sender = self.stat_sender.clone(); + let app = self.clone(); trace!("newHeads subscription {:?}", subscription_id); tokio::spawn(async move { @@ -66,8 +65,13 @@ impl Web3ProxyApp { continue; }; - // TODO: what should the payload for RequestMetadata be? - let request_metadata = Arc::new(RequestMetadata::new(0)); + let subscription_request_metadata = RequestMetadata::new( + &app, + authorization.clone(), + RequestOrMethod::Method("eth_subscribe(newHeads)", 0), + Some(new_head.number()), + ) + .await; // TODO: make a struct for this? using our JsonRpcForwardedResponse won't work because it needs an id let response_json = json!({ @@ -83,33 +87,20 @@ impl Web3ProxyApp { let response_str = serde_json::to_string(&response_json) .expect("this should always be valid json"); - // we could use response.num_bytes() here, but since we already have the string, this is easier + // we could use JsonRpcForwardedResponseEnum::num_bytes() here, but since we already have the string, this is easier let response_bytes = response_str.len(); // TODO: do clients support binary messages? + // TODO: can we check a content type header? let response_msg = Message::Text(response_str); if response_sender.send_async(response_msg).await.is_err() { + // TODO: increment error_response? i don't think so. i think this will happen once every time a client disconnects. // TODO: cancel this subscription earlier? select on head_block_receiver.next() and an abort handle? break; }; - if let Some(stat_sender) = stat_sender.as_ref() { - let response_stat = RpcQueryStats::new( - Some("eth_subscription(newHeads)".to_string()), - authorization.clone(), - request_metadata.clone(), - response_bytes, - ); - - if let Err(err) = stat_sender.send_async(response_stat.into()).await { - // TODO: what should we do? - warn!( - "stat_sender failed inside newPendingTransactions: {:?}", - err - ); - } - } + subscription_request_metadata.add_response(response_bytes); } trace!("closed newHeads subscription {:?}", subscription_id); @@ -117,8 +108,7 @@ impl Web3ProxyApp { } Some(x) if x == &json!(["newPendingTransactions"]) => { let pending_tx_receiver = self.pending_tx_sender.subscribe(); - let stat_sender = self.stat_sender.clone(); - let authorization = authorization.clone(); + let app = self.clone(); let mut pending_tx_receiver = Abortable::new( BroadcastStream::new(pending_tx_receiver), @@ -133,7 +123,13 @@ impl Web3ProxyApp { // TODO: do something with this handle? tokio::spawn(async move { while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await { - let request_metadata = Arc::new(RequestMetadata::new(0)); + let subscription_request_metadata = RequestMetadata::new( + &app, + authorization.clone(), + RequestOrMethod::Method("eth_subscribe(newPendingTransactions)", 0), + None, + ) + .await; let new_tx = match new_tx_state { TxStatus::Pending(tx) => tx, @@ -154,9 +150,11 @@ impl Web3ProxyApp { let response_str = serde_json::to_string(&response_json) .expect("this should always be valid json"); - // we could use response.num_bytes() here, but since we already have the string, this is easier + // TODO: test that this len is the same as JsonRpcForwardedResponseEnum.num_bytes() let response_bytes = response_str.len(); + subscription_request_metadata.add_response(response_bytes); + // TODO: do clients support binary messages? let response_msg = Message::Text(response_str); @@ -164,23 +162,6 @@ impl Web3ProxyApp { // TODO: cancel this subscription earlier? select on head_block_receiver.next() and an abort handle? break; }; - - if let Some(stat_sender) = stat_sender.as_ref() { - let response_stat = RpcQueryStats::new( - Some("eth_subscription(newPendingTransactions)".to_string()), - authorization.clone(), - request_metadata.clone(), - response_bytes, - ); - - if let Err(err) = stat_sender.send_async(response_stat.into()).await { - // TODO: what should we do? - warn!( - "stat_sender failed inside newPendingTransactions: {:?}", - err - ); - } - } } trace!( @@ -191,9 +172,8 @@ impl Web3ProxyApp { } Some(x) if x == &json!(["newPendingFullTransactions"]) => { // TODO: too much copy/pasta with newPendingTransactions - let authorization = authorization.clone(); let pending_tx_receiver = self.pending_tx_sender.subscribe(); - let stat_sender = self.stat_sender.clone(); + let app = self.clone(); let mut pending_tx_receiver = Abortable::new( BroadcastStream::new(pending_tx_receiver), @@ -208,7 +188,13 @@ impl Web3ProxyApp { // TODO: do something with this handle? tokio::spawn(async move { while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await { - let request_metadata = Arc::new(RequestMetadata::new(0)); + let subscription_request_metadata = RequestMetadata::new( + &app, + authorization.clone(), + RequestOrMethod::Method("eth_subscribe(newPendingFullTransactions)", 0), + None, + ) + .await; let new_tx = match new_tx_state { TxStatus::Pending(tx) => tx, @@ -227,12 +213,11 @@ impl Web3ProxyApp { }, }); + subscription_request_metadata.add_response(&response_json); + let response_str = serde_json::to_string(&response_json) .expect("this should always be valid json"); - // we could use response.num_bytes() here, but since we already have the string, this is easier - let response_bytes = response_str.len(); - // TODO: do clients support binary messages? let response_msg = Message::Text(response_str); @@ -240,23 +225,6 @@ impl Web3ProxyApp { // TODO: cancel this subscription earlier? select on head_block_receiver.next() and an abort handle? break; }; - - if let Some(stat_sender) = stat_sender.as_ref() { - let response_stat = RpcQueryStats::new( - Some("eth_subscription(newPendingFullTransactions)".to_string()), - authorization.clone(), - request_metadata.clone(), - response_bytes, - ); - - if let Err(err) = stat_sender.send_async(response_stat.into()).await { - // TODO: what should we do? - warn!( - "stat_sender failed inside newPendingFullTransactions: {:?}", - err - ); - } - } } trace!( @@ -267,9 +235,8 @@ impl Web3ProxyApp { } Some(x) if x == &json!(["newPendingRawTransactions"]) => { // TODO: too much copy/pasta with newPendingTransactions - let authorization = authorization.clone(); let pending_tx_receiver = self.pending_tx_sender.subscribe(); - let stat_sender = self.stat_sender.clone(); + let app = self.clone(); let mut pending_tx_receiver = Abortable::new( BroadcastStream::new(pending_tx_receiver), @@ -284,7 +251,13 @@ impl Web3ProxyApp { // TODO: do something with this handle? tokio::spawn(async move { while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await { - let request_metadata = Arc::new(RequestMetadata::new(0)); + let subscription_request_metadata = RequestMetadata::new( + &app, + authorization.clone(), + "eth_subscribe(newPendingRawTransactions)", + None, + ) + .await; let new_tx = match new_tx_state { TxStatus::Pending(tx) => tx, @@ -317,22 +290,7 @@ impl Web3ProxyApp { break; }; - if let Some(stat_sender) = stat_sender.as_ref() { - let response_stat = RpcQueryStats::new( - Some("eth_subscription(newPendingRawTransactions)".to_string()), - authorization.clone(), - request_metadata.clone(), - response_bytes, - ); - - if let Err(err) = stat_sender.send_async(response_stat.into()).await { - // TODO: what should we do? - warn!( - "stat_sender failed inside newPendingRawTransactions: {:?}", - err - ); - } - } + subscription_request_metadata.add_response(response_bytes); } trace!( @@ -348,19 +306,7 @@ impl Web3ProxyApp { let response = JsonRpcForwardedResponse::from_value(json!(subscription_id), id); - if let Some(stat_sender) = self.stat_sender.as_ref() { - let response_stat = RpcQueryStats::new( - Some(request_json.method.clone()), - authorization.clone(), - request_metadata, - response.num_bytes(), - ); - - if let Err(err) = stat_sender.send_async(response_stat.into()).await { - // TODO: what should we do? - warn!("stat_sender failed inside websocket: {:?}", err); - } - } + request_metadata.add_response(&response); // TODO: make a `SubscriptonHandle(AbortHandle, JoinHandle)` struct? Ok((subscription_abort_handle, response)) diff --git a/web3_proxy/src/bin/wait_for_sync.rs b/web3_proxy/src/bin/wait_for_sync.rs index a44a377c..0252f26a 100644 --- a/web3_proxy/src/bin/wait_for_sync.rs +++ b/web3_proxy/src/bin/wait_for_sync.rs @@ -60,13 +60,12 @@ async fn main() -> anyhow::Result<()> { .context("unknown chain id for check_url")?; if let Some(chain_id) = cli_config.chain_id { - if chain_id != check_id { - return Err(anyhow::anyhow!( - "chain_id of check_url is wrong! Need {}. Found {}", - chain_id, - check_id, - )); - } + anyhow::ensure!( + chain_id == check_id, + "chain_id of check_url is wrong! Need {}. Found {}", + chain_id, + check_id, + ); } let compare_url: String = match cli_config.compare_url { @@ -93,13 +92,12 @@ async fn main() -> anyhow::Result<()> { .await .context("unknown chain id for compare_url")?; - if check_id != compare_id { - return Err(anyhow::anyhow!( - "chain_id does not match! Need {}. Found {}", - check_id, - compare_id, - )); - } + anyhow::ensure!( + check_id == compare_id, + "chain_id does not match! Need {}. Found {}", + check_id, + compare_id, + ); // start ids at 2 because id 1 was checking the chain id let counter = AtomicU32::new(2); diff --git a/web3_proxy/src/bin/web3_proxy_cli/main.rs b/web3_proxy/src/bin/web3_proxy_cli/main.rs index d3124a43..53ba221d 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/main.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/main.rs @@ -38,6 +38,13 @@ use web3_proxy::{ config::TopConfig, }; +#[cfg(feature = "mimalloc")] +use mimalloc::MiMalloc; + +#[cfg(feature = "mimalloc")] +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + #[cfg(feature = "deadlock")] use {parking_lot::deadlock, std::thread, tokio::time::Duration}; @@ -120,10 +127,10 @@ fn main() -> anyhow::Result<()> { // if RUST_LOG isn't set, configure a default // TODO: is there a better way to do this? - #[cfg(tokio_console)] + #[cfg(feature = "tokio_console")] console_subscriber::init(); - #[cfg(not(tokio_console))] + #[cfg(not(feature = "tokio_console"))] let rust_log = match std::env::var("RUST_LOG") { Ok(x) => x, Err(_) => match std::env::var("WEB3_PROXY_TRACE").map(|x| x == "true") { @@ -202,7 +209,6 @@ fn main() -> anyhow::Result<()> { (None, None) }; - #[cfg(not(tokio_console))] { let logger = env_logger::builder().parse_filters(&rust_log).build(); @@ -267,9 +273,6 @@ fn main() -> anyhow::Result<()> { } // set up tokio's async runtime - #[cfg(tokio_uring)] - let mut rt_builder = tokio_uring::Builder::new_multi_thread(); - #[cfg(not(tokio_uring))] let mut rt_builder = runtime::Builder::new_multi_thread(); rt_builder.enable_all(); @@ -278,7 +281,7 @@ fn main() -> anyhow::Result<()> { rt_builder.worker_threads(cli_config.workers); } - if let Some(top_config) = top_config.as_ref() { + if let Some(ref top_config) = top_config { let chain_id = top_config.app.chain_id; rt_builder.thread_name_fn(move || { diff --git a/web3_proxy/src/bin/web3_proxy_cli/migrate_stats_to_v2.rs b/web3_proxy/src/bin/web3_proxy_cli/migrate_stats_to_v2.rs index 17ad9370..a933a86d 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/migrate_stats_to_v2.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/migrate_stats_to_v2.rs @@ -1,4 +1,4 @@ -use anyhow::Context; +use anyhow::{anyhow, Context}; use argh::FromArgs; use entities::{rpc_accounting, rpc_key}; use futures::stream::FuturesUnordered; @@ -9,17 +9,17 @@ use migration::sea_orm::{ ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter, QuerySelect, UpdateResult, }; use migration::{Expr, Value}; -use std::net::{IpAddr, Ipv4Addr}; +use parking_lot::Mutex; use std::num::NonZeroU64; use std::sync::Arc; use tokio::sync::broadcast; use tokio::time::Instant; -use web3_proxy::app::{AuthorizationChecks, BILLING_PERIOD_SECONDS}; +use ulid::Ulid; +use web3_proxy::app::BILLING_PERIOD_SECONDS; use web3_proxy::config::TopConfig; -use web3_proxy::frontend::authorization::{ - Authorization, AuthorizationType, RequestMetadata, RpcSecretKey, -}; -use web3_proxy::stats::{RpcQueryStats, StatBuffer}; +use web3_proxy::frontend::authorization::{Authorization, RequestMetadata, RpcSecretKey}; +use web3_proxy::rpcs::one::Web3Rpc; +use web3_proxy::stats::StatBuffer; #[derive(FromArgs, PartialEq, Eq, Debug)] /// Migrate towards influxdb and rpc_accounting_v2 from rpc_accounting @@ -67,28 +67,28 @@ impl MigrateStatsToV2 { }; // Spawn the stat-sender - let stat_sender = if let Some(emitter_spawn) = StatBuffer::try_spawn( - top_config.app.chain_id, + let emitter_spawn = StatBuffer::try_spawn( + BILLING_PERIOD_SECONDS, top_config .app .influxdb_bucket .clone() .context("No influxdb bucket was provided")?, + top_config.app.chain_id, Some(db_conn.clone()), + 30, influxdb_client.clone(), None, - 30, - 1, - BILLING_PERIOD_SECONDS, rpc_account_shutdown_recevier, - )? { - // since the database entries are used for accounting, we want to be sure everything is saved before exiting - important_background_handles.push(emitter_spawn.background_handle); + 1, + ) + .context("Error spawning stat buffer")? + .context("No stat buffer spawned. Maybe missing influx or db credentials?")?; - Some(emitter_spawn.stat_sender) - } else { - None - }; + // since the database entries are used for accounting, we want to be sure everything is saved before exiting + important_background_handles.push(emitter_spawn.background_handle); + + let stat_sender = emitter_spawn.stat_sender; let migration_timestamp = chrono::offset::Utc::now(); @@ -110,7 +110,10 @@ impl MigrateStatsToV2 { // (2) Create request metadata objects to match the old data // Iterate through all old rows, and put them into the above objects. for x in old_records.iter() { - let authorization_checks = match x.rpc_key_id { + let mut authorization = Authorization::internal(None) + .context("failed creating internal authorization")?; + + match x.rpc_key_id { Some(rpc_key_id) => { let rpc_key_obj = rpc_key::Entity::find() .filter(rpc_key::Column::Id.eq(rpc_key_id)) @@ -118,34 +121,16 @@ impl MigrateStatsToV2 { .await? .context("Could not find rpc_key_obj for the given rpc_key_id")?; - // TODO: Create authrization - // We can probably also randomly generate this, as we don't care about the user (?) - AuthorizationChecks { - user_id: rpc_key_obj.user_id, - rpc_secret_key: Some(RpcSecretKey::Uuid(rpc_key_obj.secret_key)), - rpc_secret_key_id: Some( - NonZeroU64::new(rpc_key_id) - .context("Could not use rpc_key_id to create a u64")?, - ), - ..Default::default() - } + authorization.checks.user_id = rpc_key_obj.user_id; + authorization.checks.rpc_secret_key = + Some(RpcSecretKey::Uuid(rpc_key_obj.secret_key)); + authorization.checks.rpc_secret_key_id = + NonZeroU64::try_from(rpc_key_id).ok(); } None => Default::default(), }; - let authorization_type = AuthorizationType::Internal; - let authorization = Arc::new( - Authorization::try_new( - authorization_checks, - None, - IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), - None, - None, - None, - authorization_type, - ) - .context("Initializing Authorization Struct was not successful")?, - ); + let authorization = Arc::new(authorization); // It will be like a fork basically (to simulate getting multiple single requests ...) // Iterate through all frontend requests @@ -178,46 +163,38 @@ impl MigrateStatsToV2 { // Add module at the last step to include for any remained that we missed ... (?) - // TODO: Create RequestMetadata + let backend_rpcs: Vec<_> = (0..int_backend_requests) + .map(|_| Arc::new(Web3Rpc::default())) + .collect(); + + let request_ulid = Ulid::new(); + + // Create RequestMetadata let request_metadata = RequestMetadata { - start_instant: Instant::now(), // This is overwritten later on - request_bytes: int_request_bytes, // Get the mean of all the request bytes archive_request: x.archive_request.into(), - backend_requests: Default::default(), // This is not used, instead we modify the field later - no_servers: 0.into(), // This is not relevant in the new version + authorization: Some(authorization.clone()), + backend_requests: Mutex::new(backend_rpcs), error_response: x.error_response.into(), + // debug data is in kafka, not mysql or influx + kafka_debug_logger: None, + method: x.method.clone(), + // This is not relevant in the new version + no_servers: 0.into(), + // Get the mean of all the request bytes + request_bytes: int_request_bytes as usize, response_bytes: int_response_bytes.into(), + // We did not initially record this data + response_from_backup_rpc: false.into(), + response_timestamp: x.period_datetime.timestamp().into(), response_millis: int_response_millis.into(), - // We just don't have this data - response_from_backup_rpc: false.into(), // I think we did not record this back then // Default::default() + // This is overwritten later on + start_instant: Instant::now(), + stat_sender: Some(stat_sender.clone()), + request_ulid, }; - // (3) Send through a channel to a stat emitter - // Send it to the stats sender - if let Some(stat_sender_ref) = stat_sender.as_ref() { - // info!("Method is: {:?}", x.clone().method); - let mut response_stat = RpcQueryStats::new( - x.clone().method, - authorization.clone(), - Arc::new(request_metadata), - (int_response_bytes) - .try_into() - .context("sum bytes average is not calculated properly")?, - ); - // Modify the timestamps .. - response_stat.modify_struct( - int_response_millis, - x.period_datetime.timestamp(), - int_backend_requests, - ); - // info!("Sending stats: {:?}", response_stat); - stat_sender_ref - // .send(response_stat.into()) - .send_async(response_stat.into()) - .await - .context("stat_sender sending response_stat")?; - } else { - panic!("Stat sender was not spawned!"); + if let Some(x) = request_metadata.try_send_stat()? { + return Err(anyhow!("failed saving stat! {:?}", x)); } } } diff --git a/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs b/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs index c70d1816..37edcafb 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs @@ -195,7 +195,7 @@ async fn run( // start the frontend port let frontend_handle = tokio::spawn(frontend::serve( app_frontend_port, - spawned_app.app.clone(), + spawned_app.app, frontend_shutdown_receiver, frontend_shutdown_complete_sender, )); @@ -417,17 +417,14 @@ mod tests { let prometheus_port = 0; let shutdown_sender = shutdown_sender.clone(); - tokio::spawn(async move { - run( - top_config, - None, - frontend_port, - prometheus_port, - 2, - shutdown_sender, - ) - .await - }) + tokio::spawn(run( + top_config, + None, + frontend_port, + prometheus_port, + 2, + shutdown_sender, + )) }; // TODO: do something to the node. query latest block, mine another block, query again diff --git a/web3_proxy/src/bin/web3_proxy_cli/rpc_accounting.rs b/web3_proxy/src/bin/web3_proxy_cli/rpc_accounting.rs index 6b73238b..2fb84848 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/rpc_accounting.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/rpc_accounting.rs @@ -108,11 +108,9 @@ impl RpcAccountingSubCommand { .all(db_conn) .await?; - if u_keys.is_empty() { - return Err(anyhow::anyhow!("no user keys")); - } + anyhow::ensure!(!u_keys.is_empty(), "no user keys"); - let u_key_ids: Vec<_> = u_keys.iter().map(|x| x.id).collect(); + let u_key_ids: Vec<_> = u_keys.into_iter().map(|x| x.id).collect(); condition = condition.add(rpc_accounting::Column::RpcKeyId.is_in(u_key_ids)); } diff --git a/web3_proxy/src/bin/web3_proxy_cli/sentryd/compare.rs b/web3_proxy/src/bin/web3_proxy_cli/sentryd/compare.rs index e5225fbc..d8c257dd 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/sentryd/compare.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/sentryd/compare.rs @@ -217,13 +217,12 @@ async fn check_rpc( .await .context(format!("awaiting response from {}", rpc))?; - if !response.status().is_success() { - return Err(anyhow::anyhow!( - "bad response from {}: {}", - rpc, - response.status(), - )); - } + anyhow::ensure!( + response.status().is_success(), + "bad response from {}: {}", + rpc, + response.status(), + ); let body = response .text() diff --git a/web3_proxy/src/bin/web3_proxy_cli/sentryd/mod.rs b/web3_proxy/src/bin/web3_proxy_cli/sentryd/mod.rs index ed8274b3..5e0af642 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/sentryd/mod.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/sentryd/mod.rs @@ -141,7 +141,7 @@ impl SentrydSubCommand { None, ); - if let Some(pagerduty_async) = pagerduty_async.as_ref() { + if let Some(ref pagerduty_async) = pagerduty_async { info!( "sending to pagerduty: {:#}", serde_json::to_string_pretty(&alert)? diff --git a/web3_proxy/src/bin/web3_proxy_cli/user_import.rs b/web3_proxy/src/bin/web3_proxy_cli/user_import.rs index 9e6545a3..a9875b3d 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/user_import.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/user_import.rs @@ -32,12 +32,11 @@ impl UserImportSubCommand { pub async fn main(self, db_conn: &DatabaseConnection) -> anyhow::Result<()> { let import_dir = Path::new(&self.input_dir); - if !import_dir.exists() { - return Err(anyhow::anyhow!( - "import dir ({}) does not exist!", - import_dir.to_string_lossy() - )); - } + anyhow::ensure!( + import_dir.exists(), + "import dir ({}) does not exist!", + import_dir.to_string_lossy() + ); let user_glob_path = import_dir.join(format!("{}-users-*.json", self.export_timestamp)); @@ -180,10 +179,7 @@ impl UserImportSubCommand { .await? { // make sure it belongs to the mapped user - if existing_rk.user_id != mapped_id { - // TODO: error or import the rest? - return Err(anyhow::anyhow!("unexpected user id")); - } + anyhow::ensure!(existing_rk.user_id == mapped_id, "unexpected user id"); // the key exists under the expected user. we are good to continue } else { diff --git a/web3_proxy/src/frontend/admin.rs b/web3_proxy/src/frontend/admin.rs index 72a49b67..c3ddf453 100644 --- a/web3_proxy/src/frontend/admin.rs +++ b/web3_proxy/src/frontend/admin.rs @@ -108,8 +108,8 @@ pub async fn admin_login_get( let login_domain = app .config .login_domain - .clone() - .unwrap_or_else(|| "llamanodes.com".to_string()); + .as_deref() + .unwrap_or("llamanodes.com"); // Also there must basically be a token, that says that one admin logins _as a user_. // I'm not yet fully sure how to handle with that logic specifically ... diff --git a/web3_proxy/src/frontend/authorization.rs b/web3_proxy/src/frontend/authorization.rs index 97672da5..5d951956 100644 --- a/web3_proxy/src/frontend/authorization.rs +++ b/web3_proxy/src/frontend/authorization.rs @@ -3,29 +3,38 @@ use super::errors::{Web3ProxyError, Web3ProxyErrorContext, Web3ProxyResult}; use super::rpc_proxy_ws::ProxyMode; use crate::app::{AuthorizationChecks, Web3ProxyApp, APP_USER_AGENT}; +use crate::jsonrpc::{JsonRpcForwardedResponse, JsonRpcRequest}; use crate::rpcs::one::Web3Rpc; +use crate::stats::{AppStat, BackendRequests, RpcQueryStats}; use crate::user_token::UserBearerToken; use axum::headers::authorization::Bearer; use axum::headers::{Header, Origin, Referer, UserAgent}; use chrono::Utc; +use core::fmt; use deferred_rate_limiter::DeferredRateLimitResult; +use derive_more::From; use entities::sea_orm_active_enums::TrackingLevel; use entities::{balance, login, rpc_key, user, user_tier}; -use ethers::types::Bytes; +use ethers::types::{Bytes, U64}; use ethers::utils::keccak256; use futures::TryFutureExt; use hashbrown::HashMap; use http::HeaderValue; use ipnet::IpNet; -use log::{error, warn}; +use log::{error, trace, warn}; use migration::sea_orm::{ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter}; -use parking_lot::Mutex; +use rdkafka::message::{Header as KafkaHeader, OwnedHeaders as KafkaOwnedHeaders, OwnedMessage}; +use rdkafka::producer::{FutureProducer, FutureRecord}; +use rdkafka::util::Timeout as KafkaTimeout; use redis_rate_limiter::redis::AsyncCommands; use redis_rate_limiter::RedisRateLimitResult; use std::fmt::Display; -use std::sync::atomic::{AtomicBool, AtomicU64}; +use std::mem; +use std::sync::atomic::{self, AtomicBool, AtomicI64, AtomicU64, AtomicUsize}; +use std::time::Duration; use std::{net::IpAddr, str::FromStr, sync::Arc}; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; +use tokio::task::JoinHandle; use tokio::time::Instant; use ulid::Ulid; use uuid::Uuid; @@ -70,37 +79,448 @@ pub struct Authorization { pub authorization_type: AuthorizationType, } +pub struct KafkaDebugLogger { + topic: String, + key: Vec, + headers: KafkaOwnedHeaders, + producer: FutureProducer, + num_requests: AtomicUsize, + num_responses: AtomicUsize, +} + +impl fmt::Debug for KafkaDebugLogger { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("KafkaDebugLogger") + .field("topic", &self.topic) + .finish_non_exhaustive() + } +} + +type KafkaLogResult = Result<(i32, i64), (rdkafka::error::KafkaError, OwnedMessage)>; + +impl KafkaDebugLogger { + fn try_new( + app: &Web3ProxyApp, + authorization: Arc, + head_block_num: Option<&U64>, + kafka_topic: &str, + request_ulid: Ulid, + ) -> Option> { + let kafka_producer = app.kafka_producer.clone()?; + + let kafka_topic = kafka_topic.to_string(); + + let rpc_secret_key_id = authorization + .checks + .rpc_secret_key_id + .map(|x| x.get()) + .unwrap_or_default(); + + let kafka_key = + rmp_serde::to_vec(&rpc_secret_key_id).expect("ids should always serialize with rmp"); + + let chain_id = app.config.chain_id; + + let head_block_num = head_block_num + .copied() + .or_else(|| app.balanced_rpcs.head_block_num()); + + // TODO: would be nice to have the block hash too + + // another item is added with the response, so initial_capacity is +1 what is needed here + let kafka_headers = KafkaOwnedHeaders::new_with_capacity(6) + .insert(KafkaHeader { + key: "rpc_secret_key_id", + value: authorization + .checks + .rpc_secret_key_id + .map(|x| x.to_string()) + .as_ref(), + }) + .insert(KafkaHeader { + key: "ip", + value: Some(&authorization.ip.to_string()), + }) + .insert(KafkaHeader { + key: "request_ulid", + value: Some(&request_ulid.to_string()), + }) + .insert(KafkaHeader { + key: "head_block_num", + value: head_block_num.map(|x| x.to_string()).as_ref(), + }) + .insert(KafkaHeader { + key: "chain_id", + value: Some(&chain_id.to_le_bytes()), + }); + + // save the key and headers for when we log the response + let x = Self { + topic: kafka_topic, + key: kafka_key, + headers: kafka_headers, + producer: kafka_producer, + num_requests: 0.into(), + num_responses: 0.into(), + }; + + let x = Arc::new(x); + + Some(x) + } + + fn background_log(&self, payload: Vec) -> JoinHandle { + let topic = self.topic.clone(); + let key = self.key.clone(); + let producer = self.producer.clone(); + let headers = self.headers.clone(); + + let f = async move { + let record = FutureRecord::to(&topic) + .key(&key) + .payload(&payload) + .headers(headers); + + let produce_future = + producer.send(record, KafkaTimeout::After(Duration::from_secs(5 * 60))); + + let kafka_response = produce_future.await; + + if let Err((err, msg)) = kafka_response.as_ref() { + error!("produce kafka request: {} - {:?}", err, msg); + // TODO: re-queue the msg? log somewhere else like a file on disk? + // TODO: this is bad and should probably trigger an alarm + }; + + kafka_response + }; + + tokio::spawn(f) + } + + /// for opt-in debug usage, log the request to kafka + /// TODO: generic type for request + pub fn log_debug_request(&self, request: &JsonRpcRequest) -> JoinHandle { + // TODO: is rust message pack a good choice? try rkyv instead + let payload = + rmp_serde::to_vec(&request).expect("requests should always serialize with rmp"); + + self.num_requests.fetch_add(1, atomic::Ordering::SeqCst); + + self.background_log(payload) + } + + pub fn log_debug_response(&self, response: &R) -> JoinHandle + where + R: serde::Serialize, + { + let payload = + rmp_serde::to_vec(&response).expect("requests should always serialize with rmp"); + + self.num_responses.fetch_add(1, atomic::Ordering::SeqCst); + + self.background_log(payload) + } +} + #[derive(Debug)] pub struct RequestMetadata { - pub start_instant: tokio::time::Instant, - pub request_bytes: u64, - // TODO: do we need atomics? seems like we should be able to pass a &mut around - // TODO: "archive" isn't really a boolean. + /// TODO: set archive_request during the new instead of after + /// TODO: this is more complex than "requires a block older than X height". different types of data can be pruned differently pub archive_request: AtomicBool, + + pub authorization: Option>, + + pub request_ulid: Ulid, + + /// Size of the JSON request. Does not include headers or things like that. + pub request_bytes: usize, + + /// users can opt out of method tracking for their personal dashboads + /// but we still have to store the method at least temporarily for cost calculations + pub method: Option, + + /// Instant that the request was received (or at least close to it) + /// We use Instant and not timestamps to avoid problems with leap seconds and similar issues + pub start_instant: tokio::time::Instant, /// if this is empty, there was a cache_hit - pub backend_requests: Mutex>>, + /// otherwise, it is populated with any rpc servers that were used by this request + pub backend_requests: BackendRequests, + /// The number of times the request got stuck waiting because no servers were synced pub no_servers: AtomicU64, + /// If handling the request hit an application error + /// This does not count things like a transcation reverting or a malformed request pub error_response: AtomicBool, + /// Size in bytes of the JSON response. Does not include headers or things like that. pub response_bytes: AtomicU64, + /// How many milliseconds it took to respond to the request pub response_millis: AtomicU64, + /// What time the (first) response was proxied. + /// TODO: think about how to store response times for ProxyMode::Versus + pub response_timestamp: AtomicI64, + /// True if the response required querying a backup RPC + /// RPC aggregators that query multiple providers to compare response may use this header to ignore our response. pub response_from_backup_rpc: AtomicBool, + + /// ProxyMode::Debug logs requests and responses with Kafka + /// TODO: maybe this shouldn't be determined by ProxyMode. A request param should probably enable this + pub kafka_debug_logger: Option>, + + /// Channel to send stats to + pub stat_sender: Option>, +} + +impl Default for RequestMetadata { + fn default() -> Self { + Self { + archive_request: Default::default(), + authorization: Default::default(), + backend_requests: Default::default(), + error_response: Default::default(), + kafka_debug_logger: Default::default(), + method: Default::default(), + no_servers: Default::default(), + request_bytes: Default::default(), + request_ulid: Default::default(), + response_bytes: Default::default(), + response_from_backup_rpc: Default::default(), + response_millis: Default::default(), + response_timestamp: Default::default(), + start_instant: Instant::now(), + stat_sender: Default::default(), + } + } +} + +#[derive(From)] +pub enum RequestOrMethod<'a> { + Request(&'a JsonRpcRequest), + /// jsonrpc method (or similar label) and the size that the request should count as (sometimes 0) + Method(&'a str, usize), + RequestSize(usize), +} + +impl<'a> RequestOrMethod<'a> { + fn method(&self) -> Option<&str> { + match self { + Self::Request(x) => Some(&x.method), + Self::Method(x, _) => Some(x), + _ => None, + } + } + + fn jsonrpc_request(&self) -> Option<&JsonRpcRequest> { + match self { + Self::Request(x) => Some(x), + _ => None, + } + } + + fn num_bytes(&self) -> usize { + match self { + RequestOrMethod::Method(_, num_bytes) => *num_bytes, + RequestOrMethod::Request(x) => x.num_bytes(), + RequestOrMethod::RequestSize(num_bytes) => *num_bytes, + } + } +} + +impl<'a> From<&'a str> for RequestOrMethod<'a> { + fn from(value: &'a str) -> Self { + if value.is_empty() { + Self::RequestSize(0) + } else { + Self::Method(value, 0) + } + } +} + +// TODO: i think a trait is actually the right thing to use here +#[derive(From)] +pub enum ResponseOrBytes<'a> { + Json(&'a serde_json::Value), + Response(&'a JsonRpcForwardedResponse), + Bytes(usize), +} + +impl<'a> From for ResponseOrBytes<'a> { + fn from(value: u64) -> Self { + Self::Bytes(value as usize) + } +} + +impl ResponseOrBytes<'_> { + pub fn num_bytes(&self) -> usize { + match self { + Self::Json(x) => serde_json::to_string(x) + .expect("this should always serialize") + .len(), + Self::Response(x) => x.num_bytes(), + Self::Bytes(num_bytes) => *num_bytes, + } + } } impl RequestMetadata { - pub fn new(request_bytes: usize) -> Self { - // TODO: how can we do this without turning it into a string first. this is going to slow us down! - let request_bytes = request_bytes as u64; + pub async fn new<'a, R: Into>>( + app: &Web3ProxyApp, + authorization: Arc, + request: R, + head_block_num: Option<&U64>, + ) -> Arc { + let request = request.into(); - Self { - start_instant: Instant::now(), - request_bytes, + let method = request.method().map(|x| x.to_string()); + + let request_bytes = request.num_bytes(); + + // TODO: modify the request here? I don't really like that very much. but its a sure way to get archive_request set correctly + + // TODO: add the Ulid at the haproxy or amazon load balancer level? investigate OpenTelemetry + let request_ulid = Ulid::new(); + + let kafka_debug_logger = if matches!(authorization.checks.proxy_mode, ProxyMode::Debug) { + KafkaDebugLogger::try_new( + app, + authorization.clone(), + head_block_num, + "web3_proxy:rpc", + request_ulid, + ) + } else { + None + }; + + if let Some(ref kafka_debug_logger) = kafka_debug_logger { + if let Some(request) = request.jsonrpc_request() { + // TODO: channels might be more ergonomic than spawned futures + // spawned things run in parallel easier but generally need more Arcs + kafka_debug_logger.log_debug_request(request); + } else { + // there probably isn't a new request attached to this metadata. + // this happens with websocket subscriptions + } + } + + let x = Self { archive_request: false.into(), backend_requests: Default::default(), - no_servers: 0.into(), error_response: false.into(), + kafka_debug_logger, + no_servers: 0.into(), + authorization: Some(authorization), + request_bytes, + method, response_bytes: 0.into(), - response_millis: 0.into(), response_from_backup_rpc: false.into(), + response_millis: 0.into(), + request_ulid, + response_timestamp: 0.into(), + start_instant: Instant::now(), + stat_sender: app.stat_sender.clone(), + }; + + Arc::new(x) + } + + pub fn backend_rpcs_used(&self) -> Vec> { + self.backend_requests.lock().clone() + } + + pub fn tracking_level(&self) -> TrackingLevel { + if let Some(authorization) = self.authorization.as_ref() { + authorization.checks.tracking_level.clone() + } else { + TrackingLevel::None + } + } + + pub fn opt_in_method(&self) -> Option { + match self.tracking_level() { + TrackingLevel::None | TrackingLevel::Aggregated => None, + TrackingLevel::Detailed => self.method.clone(), + } + } + + pub fn take_opt_in_method(&mut self) -> Option { + match self.tracking_level() { + TrackingLevel::None | TrackingLevel::Aggregated => None, + TrackingLevel::Detailed => self.method.take(), + } + } + + pub fn try_send_stat(mut self) -> Web3ProxyResult> { + if let Some(stat_sender) = self.stat_sender.take() { + trace!("sending stat! {:?}", self); + + let stat: RpcQueryStats = self.try_into()?; + + let stat: AppStat = stat.into(); + + if let Err(err) = stat_sender.send(stat) { + error!("failed sending stats for {:?}: {:?}", err.0, err); + // TODO: return it? that seems like it might cause an infinite loop + }; + + Ok(None) + } else { + Ok(Some(self)) + } + } + + pub fn add_response<'a, R: Into>>(&'a self, response: R) { + // TODO: fetch? set? should it be None in a Mutex? or a OnceCell? + let response = response.into(); + + let num_bytes = response.num_bytes() as u64; + + self.response_bytes + .fetch_add(num_bytes, atomic::Ordering::AcqRel); + + self.response_millis.fetch_add( + self.start_instant.elapsed().as_millis() as u64, + atomic::Ordering::AcqRel, + ); + + // TODO: record first or last timestamp? really, we need multiple + self.response_timestamp + .store(Utc::now().timestamp(), atomic::Ordering::Release); + + if let Some(kafka_debug_logger) = self.kafka_debug_logger.as_ref() { + if let ResponseOrBytes::Response(response) = response { + kafka_debug_logger.log_debug_response(response); + } + } + } + + pub fn try_send_arc_stat(self: Arc) -> anyhow::Result>> { + match Arc::try_unwrap(self) { + Ok(x) => { + let not_sent = x.try_send_stat()?.map(Arc::new); + Ok(not_sent) + } + Err(not_sent) => { + trace!( + "could not send stat while {} arcs are active", + Arc::strong_count(¬_sent) + ); + Ok(Some(not_sent)) + } + } + } + + // TODO: helper function to duplicate? needs to clear request_bytes, and all the atomics tho... +} + +// TODO: is this where the panic comes from? +impl Drop for RequestMetadata { + fn drop(&mut self) { + if self.stat_sender.is_some() { + // turn `&mut self` into `self` + let x = mem::take(self); + + // warn!("request metadata dropped without stat send! {:?}", self); + let _ = x.try_send_stat(); } } } @@ -445,11 +865,11 @@ pub async fn key_is_authorized( impl Web3ProxyApp { /// Limit the number of concurrent requests from the given ip address. - pub async fn ip_semaphore(&self, ip: IpAddr) -> Web3ProxyResult> { + pub async fn ip_semaphore(&self, ip: &IpAddr) -> Web3ProxyResult> { if let Some(max_concurrent_requests) = self.config.public_max_concurrent_requests { let semaphore = self .ip_semaphores - .get_with(ip, async move { + .get_with_by_ref(ip, async move { // TODO: set max_concurrent_requests dynamically based on load? let s = Semaphore::new(max_concurrent_requests); Arc::new(s) @@ -516,7 +936,7 @@ impl Web3ProxyApp { // limit concurrent requests let semaphore = self .bearer_token_semaphores - .get_with(user_bearer_token.clone(), async move { + .get_with_by_ref(&user_bearer_token, async move { let s = Semaphore::new(self.config.bearer_token_max_concurrent_requests as usize); Arc::new(s) }) @@ -623,7 +1043,7 @@ impl Web3ProxyApp { { Ok(DeferredRateLimitResult::Allowed) => { // rate limit allowed us. check concurrent request limits - let semaphore = self.ip_semaphore(ip).await?; + let semaphore = self.ip_semaphore(&ip).await?; Ok(RateLimitResult::Allowed(authorization, semaphore)) } @@ -643,14 +1063,14 @@ impl Web3ProxyApp { error!("rate limiter is unhappy. allowing ip. err={:?}", err); // at least we can still check the semaphore - let semaphore = self.ip_semaphore(ip).await?; + let semaphore = self.ip_semaphore(&ip).await?; Ok(RateLimitResult::Allowed(authorization, semaphore)) } } } else { // no redis, but we can still check the ip semaphore - let semaphore = self.ip_semaphore(ip).await?; + let semaphore = self.ip_semaphore(&ip).await?; // TODO: if no redis, rate limit with a local cache? "warn!" probably isn't right Ok(RateLimitResult::Allowed(authorization, semaphore)) diff --git a/web3_proxy/src/frontend/errors.rs b/web3_proxy/src/frontend/errors.rs index 1785975d..b7d1a669 100644 --- a/web3_proxy/src/frontend/errors.rs +++ b/web3_proxy/src/frontend/errors.rs @@ -25,6 +25,12 @@ pub type Web3ProxyResult = Result; // TODO: take "IntoResponse" instead of Response? pub type Web3ProxyResponse = Web3ProxyResult; +impl From for Web3ProxyResult<()> { + fn from(value: Web3ProxyError) -> Self { + Err(value) + } +} + // TODO: #[derive(Debug, Display, Error, From)] pub enum Web3ProxyError { @@ -35,6 +41,9 @@ pub enum Web3ProxyError { #[error(ignore)] #[from(ignore)] BadRequest(String), + #[error(ignore)] + #[from(ignore)] + BadResponse(String), BadRouting, Database(DbErr), #[display(fmt = "{:#?}, {:#?}", _0, _1)] @@ -168,6 +177,18 @@ impl Web3ProxyError { ), ) } + Self::BadResponse(err) => { + // TODO: think about this one more. ankr gives us this because ethers fails to parse responses without an id + debug!("BAD_RESPONSE: {}", err); + ( + StatusCode::INTERNAL_SERVER_ERROR, + JsonRpcForwardedResponse::from_str( + &format!("bad response: {}", err), + Some(StatusCode::INTERNAL_SERVER_ERROR.as_u16().into()), + None, + ), + ) + } Self::BadRouting => { error!("BadRouting"); ( diff --git a/web3_proxy/src/frontend/mod.rs b/web3_proxy/src/frontend/mod.rs index 9715c111..728978da 100644 --- a/web3_proxy/src/frontend/mod.rs +++ b/web3_proxy/src/frontend/mod.rs @@ -17,6 +17,7 @@ use axum::{ Extension, Router, }; use http::header::AUTHORIZATION; +use listenfd::ListenFd; use log::info; use moka::future::Cache; use std::net::SocketAddr; @@ -45,6 +46,7 @@ pub async fn serve( ) -> anyhow::Result<()> { // setup caches for whatever the frontend needs // no need for max items since it is limited by the enum key + // TODO: latest moka allows for different ttls for different let json_response_cache: FrontendJsonResponseCache = Cache::builder() .time_to_live(Duration::from_secs(2)) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); @@ -62,102 +64,77 @@ pub async fn serve( // // HTTP RPC (POST) // + // Websocket RPC (GET) + // If not an RPC, GET will redirect to urls in the config + // // public - .route("/", post(rpc_proxy_http::proxy_web3_rpc)) + .route( + "/", + post(rpc_proxy_http::proxy_web3_rpc).get(rpc_proxy_ws::websocket_handler), + ) // authenticated with and without trailing slash .route( "/rpc/:rpc_key/", - post(rpc_proxy_http::proxy_web3_rpc_with_key), + post(rpc_proxy_http::proxy_web3_rpc_with_key) + .get(rpc_proxy_ws::websocket_handler_with_key), ) .route( "/rpc/:rpc_key", - post(rpc_proxy_http::proxy_web3_rpc_with_key), + post(rpc_proxy_http::proxy_web3_rpc_with_key) + .get(rpc_proxy_ws::websocket_handler_with_key), ) // authenticated debug route with and without trailing slash .route( "/debug/:rpc_key/", - post(rpc_proxy_http::debug_proxy_web3_rpc_with_key), + post(rpc_proxy_http::debug_proxy_web3_rpc_with_key) + .get(rpc_proxy_ws::debug_websocket_handler_with_key), ) .route( "/debug/:rpc_key", - post(rpc_proxy_http::debug_proxy_web3_rpc_with_key), + post(rpc_proxy_http::debug_proxy_web3_rpc_with_key) + .get(rpc_proxy_ws::debug_websocket_handler_with_key), ) // public fastest with and without trailing slash - .route("/fastest/", post(rpc_proxy_http::fastest_proxy_web3_rpc)) - .route("/fastest", post(rpc_proxy_http::fastest_proxy_web3_rpc)) + .route( + "/fastest/", + post(rpc_proxy_http::fastest_proxy_web3_rpc) + .get(rpc_proxy_ws::fastest_websocket_handler), + ) + .route( + "/fastest", + post(rpc_proxy_http::fastest_proxy_web3_rpc) + .get(rpc_proxy_ws::fastest_websocket_handler), + ) // authenticated fastest with and without trailing slash .route( "/fastest/:rpc_key/", - post(rpc_proxy_http::fastest_proxy_web3_rpc_with_key), + post(rpc_proxy_http::fastest_proxy_web3_rpc_with_key) + .get(rpc_proxy_ws::fastest_websocket_handler_with_key), ) .route( "/fastest/:rpc_key", - post(rpc_proxy_http::fastest_proxy_web3_rpc_with_key), - ) - // public versus - .route("/versus/", post(rpc_proxy_http::versus_proxy_web3_rpc)) - .route("/versus", post(rpc_proxy_http::versus_proxy_web3_rpc)) - // authenticated versus with and without trailing slash - .route( - "/versus/:rpc_key/", - post(rpc_proxy_http::versus_proxy_web3_rpc_with_key), - ) - .route( - "/versus/:rpc_key", - post(rpc_proxy_http::versus_proxy_web3_rpc_with_key), - ) - // - // Websocket RPC (GET) - // If not an RPC, this will redirect to configurable urls - // - // public - .route("/", get(rpc_proxy_ws::websocket_handler)) - // authenticated with and without trailing slash - .route( - "/rpc/:rpc_key/", - get(rpc_proxy_ws::websocket_handler_with_key), - ) - .route( - "/rpc/:rpc_key", - get(rpc_proxy_ws::websocket_handler_with_key), - ) - // debug with and without trailing slash - .route( - "/debug/:rpc_key/", - get(rpc_proxy_ws::websocket_handler_with_key), - ) - .route( - "/debug/:rpc_key", - get(rpc_proxy_ws::websocket_handler_with_key), - ) // public fastest with and without trailing slash - .route("/fastest/", get(rpc_proxy_ws::fastest_websocket_handler)) - .route("/fastest", get(rpc_proxy_ws::fastest_websocket_handler)) - // authenticated fastest with and without trailing slash - .route( - "/fastest/:rpc_key/", - get(rpc_proxy_ws::fastest_websocket_handler_with_key), - ) - .route( - "/fastest/:rpc_key", - get(rpc_proxy_ws::fastest_websocket_handler_with_key), + post(rpc_proxy_http::fastest_proxy_web3_rpc_with_key) + .get(rpc_proxy_ws::fastest_websocket_handler_with_key), ) // public versus .route( "/versus/", - get(rpc_proxy_ws::versus_websocket_handler_with_key), + post(rpc_proxy_http::versus_proxy_web3_rpc).get(rpc_proxy_ws::versus_websocket_handler), ) .route( "/versus", - get(rpc_proxy_ws::versus_websocket_handler_with_key), + post(rpc_proxy_http::versus_proxy_web3_rpc).get(rpc_proxy_ws::versus_websocket_handler), ) // authenticated versus with and without trailing slash .route( "/versus/:rpc_key/", - get(rpc_proxy_ws::versus_websocket_handler_with_key), + post(rpc_proxy_http::versus_proxy_web3_rpc_with_key) + .get(rpc_proxy_ws::versus_websocket_handler_with_key), ) .route( "/versus/:rpc_key", - get(rpc_proxy_ws::versus_websocket_handler_with_key), + post(rpc_proxy_http::versus_proxy_web3_rpc_with_key) + .get(rpc_proxy_ws::versus_websocket_handler_with_key), ) // // System things @@ -241,19 +218,29 @@ pub async fn serve( // handle cors .layer(CorsLayer::very_permissive()) // application state - .layer(Extension(proxy_app.clone())) + .layer(Extension(proxy_app)) // frontend caches .layer(Extension(json_response_cache)) .layer(Extension(health_cache)) // 404 for any unknown routes .fallback(errors::handler_404); - // run our app with hyper - // TODO: allow only listening on localhost? top_config.app.host.parse()? - let addr = SocketAddr::from(([0, 0, 0, 0], port)); - info!("listening on port {}", port); + let server_builder = if let Some(listener) = ListenFd::from_env().take_tcp_listener(0)? { + // use systemd socket magic for no downtime deploys + let addr = listener.local_addr()?; - // TODO: into_make_service is enough if we always run behind a proxy. make into_make_service_with_connect_info optional? + info!("listening with fd at {}", addr); + + axum::Server::from_tcp(listener)? + } else { + info!("listening on port {}", port); + // TODO: allow only listening on localhost? top_config.app.host.parse()? + let addr = SocketAddr::from(([0, 0, 0, 0], port)); + + axum::Server::try_bind(&addr)? + }; + + // into_make_service is enough if we always run behind a proxy /* It sequentially looks for an IP in: - x-forwarded-for header (de-facto standard) @@ -261,12 +248,21 @@ pub async fn serve( - forwarded header (new standard) - axum::extract::ConnectInfo (if not behind proxy) */ - let service = app.into_make_service_with_connect_info::(); + #[cfg(feature = "connectinfo")] + let make_service = { + info!("connectinfo feature enabled"); + app.into_make_service_with_connect_info::() + }; - // `axum::Server` is a re-export of `hyper::Server` - let server = axum::Server::bind(&addr) + #[cfg(not(feature = "connectinfo"))] + let make_service = { + info!("connectinfo feature disabled"); + app.into_make_service() + }; + + let server = server_builder + .serve(make_service) // TODO: option to use with_connect_info. we want it in dev, but not when running behind a proxy, but not - .serve(service) .with_graceful_shutdown(async move { let _ = shutdown_receiver.recv().await; }) diff --git a/web3_proxy/src/frontend/rpc_proxy_http.rs b/web3_proxy/src/frontend/rpc_proxy_http.rs index 2d938adb..06b55603 100644 --- a/web3_proxy/src/frontend/rpc_proxy_http.rs +++ b/web3_proxy/src/frontend/rpc_proxy_http.rs @@ -63,12 +63,12 @@ async fn _proxy_web3_rpc( let authorization = Arc::new(authorization); - let (response, rpcs, _semaphore) = app + let (status_code, response, rpcs, _semaphore) = app .proxy_web3_rpc(authorization, payload) .await - .map(|(x, y)| (x, y, semaphore))?; + .map(|(s, x, y)| (s, x, y, semaphore))?; - let mut response = Json(&response).into_response(); + let mut response = (status_code, Json(response)).into_response(); let headers = response.headers_mut(); @@ -129,6 +129,8 @@ pub async fn proxy_web3_rpc_with_key( .await } +// TODO: if a /debug/ request gets rejected by an invalid request, there won't be any kafka log +// TODO: #[debug_handler] pub async fn debug_proxy_web3_rpc_with_key( Extension(app): Extension>, @@ -228,12 +230,12 @@ async fn _proxy_web3_rpc_with_key( let rpc_secret_key_id = authorization.checks.rpc_secret_key_id; - let (response, rpcs, _semaphore) = app + let (status_code, response, rpcs, _semaphore) = app .proxy_web3_rpc(authorization, payload) .await - .map(|(x, y)| (x, y, semaphore))?; + .map(|(s, x, y)| (s, x, y, semaphore))?; - let mut response = Json(&response).into_response(); + let mut response = (status_code, Json(response)).into_response(); let headers = response.headers_mut(); diff --git a/web3_proxy/src/frontend/rpc_proxy_ws.rs b/web3_proxy/src/frontend/rpc_proxy_ws.rs index dd04f0dc..e0522b00 100644 --- a/web3_proxy/src/frontend/rpc_proxy_ws.rs +++ b/web3_proxy/src/frontend/rpc_proxy_ws.rs @@ -5,12 +5,12 @@ use super::authorization::{ip_is_authorized, key_is_authorized, Authorization, RequestMetadata}; use super::errors::{Web3ProxyError, Web3ProxyResponse}; use crate::jsonrpc::JsonRpcId; -use crate::stats::RpcQueryStats; use crate::{ app::Web3ProxyApp, frontend::errors::Web3ProxyResult, jsonrpc::{JsonRpcForwardedResponse, JsonRpcForwardedResponseEnum, JsonRpcRequest}, }; +use anyhow::Context; use axum::headers::{Origin, Referer, UserAgent}; use axum::{ extract::ws::{Message, WebSocket, WebSocketUpgrade}, @@ -20,6 +20,8 @@ use axum::{ }; use axum_client_ip::InsecureClientIp; use axum_macros::debug_handler; +use ethers::types::Bytes; +use fstrings::{f, format_args_f}; use futures::SinkExt; use futures::{ future::AbortHandle, @@ -28,12 +30,13 @@ use futures::{ use handlebars::Handlebars; use hashbrown::HashMap; use http::StatusCode; -use log::{info, trace, warn}; +use log::{info, trace}; use serde_json::json; use std::sync::Arc; use std::{str::from_utf8_mut, sync::atomic::AtomicUsize}; use tokio::sync::{broadcast, OwnedSemaphorePermit, RwLock}; +/// How to select backend servers for a request #[derive(Copy, Clone, Debug)] pub enum ProxyMode { /// send to the "best" synced server @@ -43,6 +46,7 @@ pub enum ProxyMode { /// send to all servers for benchmarking. return the fastest non-error response Versus, /// send all requests and responses to kafka + /// TODO: should this be seperate from best/fastest/versus? Debug, } @@ -314,14 +318,15 @@ async fn proxy_web3_socket( } /// websockets support a few more methods than http clients +/// TODO: i think this subscriptions hashmap grows unbounded async fn handle_socket_payload( app: Arc, authorization: &Arc, payload: &str, response_sender: &flume::Sender, subscription_count: &AtomicUsize, - subscriptions: Arc>>, -) -> (Message, Option) { + subscriptions: Arc>>, +) -> Web3ProxyResult<(Message, Option)> { let (authorization, semaphore) = match authorization.check_again(&app).await { Ok((a, s)) => (a, s), Err(err) => { @@ -329,7 +334,7 @@ async fn handle_socket_payload( let err = serde_json::to_string(&err).expect("to_string should always work here"); - return (Message::Text(err), None); + return Ok((Message::Text(err), None)); } }; @@ -338,84 +343,90 @@ async fn handle_socket_payload( Ok(json_request) => { let id = json_request.id.clone(); - let response: Web3ProxyResult = match &json_request.method - [..] - { - "eth_subscribe" => { - // TODO: how can we subscribe with proxy_mode? - match app - .eth_subscribe( - authorization.clone(), - json_request, - subscription_count, - response_sender.clone(), - ) + // TODO: move this to a seperate function so we can use the try operator + let response: Web3ProxyResult = + match &json_request.method[..] { + "eth_subscribe" => { + // TODO: how can we subscribe with proxy_mode? + match app + .eth_subscribe( + authorization.clone(), + json_request, + subscription_count, + response_sender.clone(), + ) + .await + { + Ok((handle, response)) => { + { + let mut x = subscriptions.write().await; + + let result: &serde_json::value::RawValue = response + .result + .as_ref() + .context("there should be a result here")?; + + // TODO: there must be a better way to do this + let k: Bytes = serde_json::from_str(result.get()) + .context("subscription ids must be bytes")?; + + x.insert(k, handle); + }; + + Ok(response.into()) + } + Err(err) => Err(err), + } + } + "eth_unsubscribe" => { + let request_metadata = + RequestMetadata::new(&app, authorization.clone(), &json_request, None) + .await; + + #[derive(serde::Deserialize)] + struct EthUnsubscribeParams([Bytes; 1]); + + if let Some(params) = json_request.params { + match serde_json::from_value(params) { + Ok::(params) => { + let subscription_id = ¶ms.0[0]; + + // TODO: is this the right response? + let partial_response = { + let mut x = subscriptions.write().await; + match x.remove(subscription_id) { + None => false, + Some(handle) => { + handle.abort(); + true + } + } + }; + + let response = JsonRpcForwardedResponse::from_value( + json!(partial_response), + id.clone(), + ); + + request_metadata.add_response(&response); + + Ok(response.into()) + } + Err(err) => Err(Web3ProxyError::BadRequest(f!( + "incorrect params given for eth_unsubscribe. {err:?}" + ))), + } + } else { + Err(Web3ProxyError::BadRequest( + "no params given for eth_unsubscribe".to_string(), + )) + } + } + _ => app + .proxy_web3_rpc(authorization.clone(), json_request.into()) .await - { - Ok((handle, response)) => { - // TODO: better key - let mut x = subscriptions.write().await; - - x.insert( - response - .result - .as_ref() - // TODO: what if there is an error? - .expect("response should always have a result, not an error") - .to_string(), - handle, - ); - - Ok(response.into()) - } - Err(err) => Err(err), - } - } - "eth_unsubscribe" => { - // TODO: move this logic into the app? - let request_bytes = json_request.num_bytes(); - - let request_metadata = Arc::new(RequestMetadata::new(request_bytes)); - - let subscription_id = json_request.params.unwrap().to_string(); - - let mut x = subscriptions.write().await; - - // TODO: is this the right response? - let partial_response = match x.remove(&subscription_id) { - None => false, - Some(handle) => { - handle.abort(); - true - } - }; - - drop(x); - - let response = - JsonRpcForwardedResponse::from_value(json!(partial_response), id.clone()); - - if let Some(stat_sender) = app.stat_sender.as_ref() { - let response_stat = RpcQueryStats::new( - Some(json_request.method.clone()), - authorization.clone(), - request_metadata, - response.num_bytes(), - ); - - if let Err(err) = stat_sender.send_async(response_stat.into()).await { - // TODO: what should we do? - warn!("stat_sender failed during eth_unsubscribe: {:?}", err); - } - } - - Ok(response.into()) - } - _ => app - .proxy_web3_rpc(authorization.clone(), json_request.into()) - .await - .map(|(response, _)| response), - }; + .map(|(status_code, response, _)| response), + }; (id, response) } @@ -434,7 +445,7 @@ async fn handle_socket_payload( } }; - (Message::Text(response_str), semaphore) + Ok((Message::Text(response_str), semaphore)) } async fn read_web3_socket( @@ -443,7 +454,7 @@ async fn read_web3_socket( mut ws_rx: SplitStream, response_sender: flume::Sender, ) { - // TODO: need a concurrent hashmap + // RwLock should be fine here. a user isn't going to be opening tons of subscriptions let subscriptions = Arc::new(RwLock::new(HashMap::new())); let subscription_count = Arc::new(AtomicUsize::new(1)); @@ -467,16 +478,17 @@ async fn read_web3_socket( // new message from our client. forward to a backend and then send it through response_tx let response_msg = match msg { - Message::Text(payload) => { + Message::Text(ref payload) => { + // TODO: do not unwrap! let (msg, s) = handle_socket_payload( app.clone(), &authorization, - &payload, + payload, &response_sender, &subscription_count, subscriptions, ) - .await; + .await.unwrap(); _semaphore = s; @@ -499,6 +511,7 @@ async fn read_web3_socket( Message::Binary(mut payload) => { let payload = from_utf8_mut(&mut payload).unwrap(); + // TODO: do not unwrap! let (msg, s) = handle_socket_payload( app.clone(), &authorization, @@ -507,7 +520,7 @@ async fn read_web3_socket( &subscription_count, subscriptions, ) - .await; + .await.unwrap(); _semaphore = s; diff --git a/web3_proxy/src/frontend/status.rs b/web3_proxy/src/frontend/status.rs index 970ad551..0e46c21a 100644 --- a/web3_proxy/src/frontend/status.rs +++ b/web3_proxy/src/frontend/status.rs @@ -30,9 +30,13 @@ pub async fn health( /// Easy alerting if backup servers are in use. pub async fn backups_needed(Extension(app): Extension>) -> impl IntoResponse { let code = { - let consensus_rpcs = app.balanced_rpcs.watch_consensus_rpcs_sender.borrow(); + let consensus_rpcs = app + .balanced_rpcs + .watch_consensus_rpcs_sender + .borrow() + .clone(); - if let Some(consensus_rpcs) = consensus_rpcs.as_ref() { + if let Some(ref consensus_rpcs) = consensus_rpcs { if consensus_rpcs.backups_needed { StatusCode::INTERNAL_SERVER_ERROR } else { diff --git a/web3_proxy/src/frontend/users/authentication.rs b/web3_proxy/src/frontend/users/authentication.rs index f70b63e9..e681ea41 100644 --- a/web3_proxy/src/frontend/users/authentication.rs +++ b/web3_proxy/src/frontend/users/authentication.rs @@ -284,7 +284,7 @@ pub async fn user_login_post( let rpc_secret_key = RpcSecretKey::new(); let user_rpc_key = rpc_key::ActiveModel { - user_id: sea_orm::Set(caller.id.clone()), + user_id: sea_orm::Set(caller.id), secret_key: sea_orm::Set(rpc_secret_key.into()), description: sea_orm::Set(None), ..Default::default() @@ -297,7 +297,7 @@ pub async fn user_login_post( // We should also create the balance entry ... let user_balance = balance::ActiveModel { - user_id: sea_orm::Set(caller.id.clone()), + user_id: sea_orm::Set(caller.id), available_balance: sea_orm::Set(Decimal::new(0, 0)), used_balance: sea_orm::Set(Decimal::new(0, 0)), ..Default::default() diff --git a/web3_proxy/src/frontend/users/payment.rs b/web3_proxy/src/frontend/users/payment.rs index 9fbf7daa..b49c3705 100644 --- a/web3_proxy/src/frontend/users/payment.rs +++ b/web3_proxy/src/frontend/users/payment.rs @@ -321,7 +321,7 @@ pub async fn user_balance_post( } // Get the topics out - let topic: H256 = H256::from(log.topics.get(0).unwrap().to_owned()); + let topic: H256 = log.topics.get(0).unwrap().to_owned(); if topic != deposit_topic { debug!( "Out: Topic is not relevant: {:?} {:?}", @@ -489,8 +489,9 @@ pub async fn user_balance_post( })), ) .into_response(); + // Return early if the log was added, assume there is at most one valid log per transaction - return Ok(response.into()); + return Ok(response); } Err(Web3ProxyError::BadRequest( diff --git a/web3_proxy/src/frontend/users/referral.rs b/web3_proxy/src/frontend/users/referral.rs index ac4649d0..c4f613ac 100644 --- a/web3_proxy/src/frontend/users/referral.rs +++ b/web3_proxy/src/frontend/users/referral.rs @@ -19,7 +19,6 @@ use migration::sea_orm::ActiveModelTrait; use migration::sea_orm::ColumnTrait; use migration::sea_orm::EntityTrait; use migration::sea_orm::QueryFilter; -use migration::sea_orm::TransactionTrait; use serde_json::json; use std::sync::Arc; @@ -49,7 +48,7 @@ pub async fn user_referral_link_get( warn!("User tier is: {:?}", user_tier); // TODO: This shouldn't be hardcoded. Also, it should be an enum, not sth like this ... if user_tier.id != 6 { - return Err(Web3ProxyError::PaymentRequired.into()); + return Err(Web3ProxyError::PaymentRequired); } // Then get the referral token diff --git a/web3_proxy/src/frontend/users/subuser.rs b/web3_proxy/src/frontend/users/subuser.rs index c1d4eb3b..e382da73 100644 --- a/web3_proxy/src/frontend/users/subuser.rs +++ b/web3_proxy/src/frontend/users/subuser.rs @@ -49,7 +49,7 @@ pub async fn get_keys_as_subuser( .all(db_replica.conn()) .await? .into_iter() - .map(|x| (x.rpc_secret_key_id.clone(), x)) + .map(|x| (x.rpc_secret_key_id, x)) .collect::>(); // Now return a list of all subusers (their wallets) @@ -147,7 +147,7 @@ pub async fn get_subusers( .all(db_replica.conn()) .await? .into_iter() - .map(|x| (x.user_id.clone(), x)) + .map(|x| (x.user_id, x)) .collect::>(); // Now return a list of all subusers (their wallets) @@ -314,7 +314,7 @@ pub async fn modify_subuser( let rpc_secret_key = RpcSecretKey::new(); let subuser_rpc_key = rpc_key::ActiveModel { - user_id: sea_orm::Set(subuser.id.clone()), + user_id: sea_orm::Set(subuser.id), secret_key: sea_orm::Set(rpc_secret_key.into()), description: sea_orm::Set(None), ..Default::default() @@ -327,7 +327,7 @@ pub async fn modify_subuser( // We should also create the balance entry ... let subuser_balance = balance::ActiveModel { - user_id: sea_orm::Set(subuser.id.clone()), + user_id: sea_orm::Set(subuser.id), available_balance: sea_orm::Set(Decimal::new(0, 0)), used_balance: sea_orm::Set(Decimal::new(0, 0)), ..Default::default() @@ -374,7 +374,8 @@ pub async fn modify_subuser( let txn = db_conn.begin().await?; let mut action = "no action"; - let _ = match subuser_entry_secondary_user { + + match subuser_entry_secondary_user { Some(secondary_user) => { // In this case, remove the subuser let mut active_subuser_entry_secondary_user = secondary_user.into_active_model(); @@ -421,6 +422,7 @@ pub async fn modify_subuser( })), ) .into_response(); + // Return early if the log was added, assume there is at most one valid log per transaction - Ok(response.into()) + Ok(response) } diff --git a/web3_proxy/src/jsonrpc.rs b/web3_proxy/src/jsonrpc.rs index 78c76b0e..eabd3d0b 100644 --- a/web3_proxy/src/jsonrpc.rs +++ b/web3_proxy/src/jsonrpc.rs @@ -290,9 +290,12 @@ impl JsonRpcForwardedResponse { data = err.data.clone(); } else if let Some(err) = err.as_serde_error() { // this is not an rpc error. keep it as an error - return Err(Web3ProxyError::BadRequest(format!("bad request: {}", err))); + return Err(Web3ProxyError::BadResponse(format!( + "bad response: {}", + err + ))); } else { - return Err(anyhow::anyhow!("unexpected ethers error!").into()); + return Err(anyhow::anyhow!("unexpected ethers error! {:?}", err).into()); } } e => return Err(e.into()), diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs index c7f641e3..3d6ac3dd 100644 --- a/web3_proxy/src/rpcs/blockchain.rs +++ b/web3_proxy/src/rpcs/blockchain.rs @@ -169,7 +169,6 @@ impl Web3Rpcs { // TODO: i think we can rearrange this function to make it faster on the hot path let block_hash = block.hash(); - // skip Block::default() if block_hash.is_zero() { debug!("Skipping block without hash!"); return Ok(block); @@ -189,7 +188,7 @@ impl Web3Rpcs { // TODO: use their get_with let block = self .blocks_by_hash - .get_with(*block_hash, async move { block.clone() }) + .get_with(*block_hash, async move { block }) .await; Ok(block) @@ -236,7 +235,7 @@ impl Web3Rpcs { None => { // TODO: helper for method+params => JsonRpcRequest // TODO: does this id matter? - let request = json!({ "id": "1", "method": "eth_getBlockByHash", "params": get_block_params }); + let request = json!({ "jsonrpc": "2.0", "id": "1", "method": "eth_getBlockByHash", "params": get_block_params }); let request: JsonRpcRequest = serde_json::from_value(request)?; // TODO: request_metadata? maybe we should put it in the authorization? @@ -244,7 +243,7 @@ impl Web3Rpcs { let response = self .try_send_best_consensus_head_connection( authorization, - request, + &request, None, None, None, @@ -344,7 +343,7 @@ impl Web3Rpcs { let request: JsonRpcRequest = serde_json::from_value(request)?; let response = self - .try_send_best_consensus_head_connection(authorization, request, None, Some(num), None) + .try_send_best_consensus_head_connection(authorization, &request, None, Some(num), None) .await?; if response.error.is_some() { @@ -446,7 +445,7 @@ impl Web3Rpcs { let consensus_head_block = new_synced_connections.head_block.clone(); let num_consensus_rpcs = new_synced_connections.num_conns(); let num_active_rpcs = consensus_finder.len(); - let total_rpcs = self.by_name.read().len(); + let total_rpcs = self.by_name.load().len(); let old_consensus_head_connections = self .watch_consensus_rpcs_sender diff --git a/web3_proxy/src/rpcs/consensus.rs b/web3_proxy/src/rpcs/consensus.rs index a20f55f9..e6261412 100644 --- a/web3_proxy/src/rpcs/consensus.rs +++ b/web3_proxy/src/rpcs/consensus.rs @@ -3,6 +3,7 @@ use super::many::Web3Rpcs; use super::one::Web3Rpc; use crate::frontend::authorization::Authorization; use crate::frontend::errors::{Web3ProxyErrorContext, Web3ProxyResult}; +use derive_more::Constructor; use ethers::prelude::{H256, U64}; use hashbrown::{HashMap, HashSet}; use itertools::{Itertools, MinMaxResult}; @@ -10,28 +11,146 @@ use log::{trace, warn}; use moka::future::Cache; use serde::Serialize; use std::cmp::Reverse; +use std::collections::BTreeMap; use std::fmt; use std::sync::Arc; use tokio::time::Instant; +#[derive(Clone, Serialize)] +struct RpcData { + head_block_num: U64, + // TODO: this is too simple. erigon has 4 prune levels (hrct) + oldest_block_num: U64, +} + +impl RpcData { + fn new(rpc: &Web3Rpc, head: &Web3ProxyBlock) -> Self { + let head_block_num = *head.number(); + + let block_data_limit = rpc.block_data_limit(); + + let oldest_block_num = head_block_num.saturating_sub(block_data_limit); + + Self { + head_block_num, + oldest_block_num, + } + } + + // TODO: take an enum for the type of data (hrtc) + fn data_available(&self, block_num: &U64) -> bool { + *block_num >= self.oldest_block_num && *block_num <= self.head_block_num + } +} + +#[derive(Constructor, Clone, Copy, Debug, Default, Eq, PartialEq, Serialize)] +pub struct RpcRanking { + tier: u64, + backup: bool, + head_num: Option, +} + +impl RpcRanking { + pub fn add_offset(&self, offset: u64) -> Self { + Self { + tier: self.tier + offset, + backup: self.backup, + head_num: self.head_num, + } + } + + pub fn default_with_backup(backup: bool) -> Self { + Self { + backup, + ..Default::default() + } + } + + fn sort_key(&self) -> (u64, bool, Reverse>) { + // TODO: add soft_limit here? add peak_ewma here? + (self.tier, !self.backup, Reverse(self.head_num)) + } +} + +impl Ord for RpcRanking { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.sort_key().cmp(&other.sort_key()) + } +} + +impl PartialOrd for RpcRanking { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +pub type RankedRpcMap = BTreeMap>>; + /// A collection of Web3Rpcs that are on the same block. /// Serialize is so we can print it on our debug endpoint #[derive(Clone, Serialize)] pub struct ConsensusWeb3Rpcs { pub(crate) tier: u64, + pub(crate) backups_needed: bool, pub(crate) head_block: Web3ProxyBlock, pub(crate) best_rpcs: Vec>, - // TODO: functions like "compare_backup_vote()" - // pub(super) backups_voted: Option, - pub(crate) backups_needed: bool, + pub(crate) other_rpcs: RankedRpcMap, + + rpc_data: HashMap, RpcData>, } impl ConsensusWeb3Rpcs { - #[inline(always)] + #[inline] pub fn num_conns(&self) -> usize { self.best_rpcs.len() } + pub fn has_block_data(&self, rpc: &Web3Rpc, block_num: &U64) -> bool { + self.rpc_data + .get(rpc) + .map(|x| x.data_available(block_num)) + .unwrap_or(false) + } + + pub fn filter( + &self, + skip: &[Arc], + min_block_needed: Option<&U64>, + max_block_needed: Option<&U64>, + rpc: &Arc, + ) -> bool { + if skip.contains(rpc) { + trace!("skipping {}", rpc); + return false; + } + + if let Some(min_block_needed) = min_block_needed { + if !self.has_block_data(rpc, min_block_needed) { + trace!( + "{} is missing min_block_needed ({}). skipping", + rpc, + min_block_needed, + ); + return false; + } + } + + if let Some(max_block_needed) = max_block_needed { + if !self.has_block_data(rpc, max_block_needed) { + trace!( + "{} is missing max_block_needed ({}). skipping", + rpc, + max_block_needed, + ); + return false; + } + } + + // we could check hard rate limits here, but i think it is faster to do later + + true + } + // TODO: sum_hard_limit? } @@ -46,6 +165,7 @@ impl fmt::Debug for ConsensusWeb3Rpcs { } } +// TODO: refs for all of these. borrow on a Sender is cheap enough impl Web3Rpcs { // TODO: return a ref? pub fn head_block(&self) -> Option { @@ -93,7 +213,6 @@ pub struct ConsensusFinder { /// `tiers[0] = only tier 0` /// `tiers[1] = tier 0 and tier 1` /// `tiers[n] = tier 0..=n` - /// This is a BTreeMap and not a Vec because sometimes a tier is empty rpc_heads: HashMap, Web3ProxyBlock>, /// never serve blocks that are too old max_block_age: Option, @@ -137,7 +256,7 @@ impl ConsensusFinder { async fn insert(&mut self, rpc: Arc, block: Web3ProxyBlock) -> Option { let first_seen = self .first_seen - .get_with(*block.hash(), async move { Instant::now() }) + .get_with_by_ref(block.hash(), async move { Instant::now() }) .await; // TODO: this should be 0 if we are first seen, but i think it will be slightly non-zero. @@ -166,13 +285,6 @@ impl ConsensusFinder { .await .web3_context("failed caching block")?; - // if let Some(max_block_lag) = max_block_lag { - // if rpc_head_block.number() < ??? { - // trace!("rpc_head_block from {} is too far behind! {}", rpc, rpc_head_block); - // return Ok(self.remove(&rpc).is_some()); - // } - // } - if let Some(max_age) = self.max_block_age { if rpc_head_block.age() > max_age { trace!("rpc_head_block from {} is too old! {}", rpc, rpc_head_block); @@ -324,7 +436,7 @@ impl ConsensusFinder { ) -> Option { // sort the primary votes ascending by tier and descending by block num let mut votes: Vec<_> = votes - .iter() + .into_iter() .map(|(block, (rpc_names, sum_soft_limit))| (block, sum_soft_limit, rpc_names)) .collect(); votes.sort_by_cached_key(|(block, sum_soft_limit, rpc_names)| { @@ -366,11 +478,39 @@ impl ConsensusFinder { let backups_needed = consensus_rpcs.iter().any(|x| x.backup); + let mut other_rpcs = BTreeMap::new(); + + for (x, x_head) in self + .rpc_heads + .iter() + .filter(|(k, _)| !consensus_rpcs.contains(k)) + { + let x_head_num = *x_head.number(); + + let key: RpcRanking = RpcRanking::new(x.tier, x.backup, Some(x_head_num)); + + other_rpcs + .entry(key) + .or_insert_with(Vec::new) + .push(x.clone()); + } + + // TODO: how should we populate this? + let mut rpc_data = HashMap::with_capacity(self.rpc_heads.len()); + + for (x, x_head) in self.rpc_heads.iter() { + let y = RpcData::new(x, x_head); + + rpc_data.insert(x.clone(), y); + } + let consensus = ConsensusWeb3Rpcs { tier, head_block: maybe_head_block.clone(), best_rpcs: consensus_rpcs, + other_rpcs, backups_needed, + rpc_data, }; return Some(consensus); diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index dd9c3e1c..9d27ad43 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -9,8 +9,10 @@ use crate::frontend::authorization::{Authorization, RequestMetadata}; use crate::frontend::errors::{Web3ProxyError, Web3ProxyResult}; use crate::frontend::rpc_proxy_ws::ProxyMode; use crate::jsonrpc::{JsonRpcForwardedResponse, JsonRpcRequest}; +use crate::rpcs::consensus::{RankedRpcMap, RpcRanking}; use crate::rpcs::transactions::TxStatus; use anyhow::Context; +use arc_swap::ArcSwap; use counter::Counter; use derive_more::From; use ethers::prelude::{ProviderError, TxHash, H256, U64}; @@ -23,16 +25,15 @@ use log::{debug, error, info, trace, warn, Level}; use migration::sea_orm::DatabaseConnection; use moka::future::{Cache, ConcurrentCacheExt}; use ordered_float::OrderedFloat; -use parking_lot::RwLock; use serde::ser::{SerializeStruct, Serializer}; use serde::Serialize; use serde_json::json; use serde_json::value::RawValue; use std::cmp::{min_by_key, Reverse}; use std::collections::BTreeMap; -use std::sync::atomic::{self, Ordering}; +use std::fmt; +use std::sync::atomic::Ordering; use std::sync::Arc; -use std::{cmp, fmt}; use thread_fast_rng::rand::seq::SliceRandom; use tokio; use tokio::sync::{broadcast, watch}; @@ -44,9 +45,8 @@ pub struct Web3Rpcs { /// if watch_consensus_head_sender is some, Web3Rpc inside self will send blocks here when they get them pub(crate) block_sender: flume::Sender<(Option, Arc)>, /// any requests will be forwarded to one (or more) of these connections - /// TODO: i tried to make this an AsyncRwLock, but then we have trouble serializing it - /// TODO: maybe an ArcSwap would be better. writes are rare - pub(crate) by_name: RwLock>>, + pub(crate) by_name: ArcSwap>>, + /// notify all http providers to check their blocks at the same time pub(crate) http_interval_sender: Option>>, /// all providers with the same consensus head block. won't update if there is no `self.watch_consensus_head_sender` /// TODO: document that this is a watch sender and not a broadcast! if things get busy, blocks might get missed @@ -202,11 +202,7 @@ impl Web3Rpcs { let handle = { let connections = connections.clone(); - tokio::spawn(async move { - connections - .subscribe(authorization, block_receiver, pending_tx_sender) - .await - }) + tokio::spawn(connections.subscribe(authorization, block_receiver, pending_tx_sender)) }; Ok((connections, handle, consensus_connections_watcher)) @@ -234,14 +230,13 @@ impl Web3Rpcs { // TODO: will need to think about this more once sum_soft_limit is dynamic let sum_soft_limit = rpc_configs.values().fold(0, |acc, x| acc + x.soft_limit); - // TODO: < is a bit dangerous, we should require a buffer - if sum_soft_limit < self.min_sum_soft_limit { - return Err(anyhow::anyhow!( - "Only {}/{} soft limit! Add more rpcs, increase soft limits, or reduce min_sum_soft_limit.", - sum_soft_limit, - self.min_sum_soft_limit - )); - } + // TODO: require a buffer? + anyhow::ensure!( + sum_soft_limit >= self.min_sum_soft_limit, + "Only {}/{} soft limit! Add more rpcs, increase soft limits, or reduce min_sum_soft_limit.", + sum_soft_limit, + self.min_sum_soft_limit, + ); // turn configs into connections (in parallel) // TODO: move this into a helper function. then we can use it when configs change (will need a remove function too) @@ -270,22 +265,18 @@ impl Web3Rpcs { debug!("spawning {}", server_name); - let handle = tokio::spawn(async move { - server_config - .spawn( - server_name, - db_conn, - vredis_pool, - chain_id, - http_client, - http_interval_sender, - blocks_by_hash, - block_sender, - pending_tx_id_sender, - true, - ) - .await - }); + let handle = tokio::spawn(server_config.spawn( + server_name, + db_conn, + vredis_pool, + chain_id, + http_client, + http_interval_sender, + blocks_by_hash, + block_sender, + pending_tx_id_sender, + true, + )); Some(handle) }) @@ -295,15 +286,21 @@ impl Web3Rpcs { match x { Ok(Ok((rpc, _handle))) => { // web3 connection worked - let old_rpc = self.by_name.write().insert(rpc.name.clone(), rpc.clone()); + let mut new_by_name = (*self.by_name.load_full()).clone(); + + let old_rpc = new_by_name.insert(rpc.name.clone(), rpc.clone()); + + self.by_name.store(Arc::new(new_by_name)); if let Some(old_rpc) = old_rpc { - if old_rpc.head_block.read().is_some() { + if old_rpc.head_block.as_ref().unwrap().borrow().is_some() { + let mut new_head_receiver = + rpc.head_block.as_ref().unwrap().subscribe(); debug!("waiting for new {} to sync", rpc); - // TODO: wait for connection to have a block by watching a channel instead of looping + // TODO: maximum wait time or this could block things for too long - while rpc.head_block.read().is_none() { - sleep(Duration::from_millis(100)).await; + while new_head_receiver.borrow_and_update().is_none() { + new_head_receiver.changed().await?; } } @@ -329,15 +326,15 @@ impl Web3Rpcs { } pub fn get(&self, conn_name: &str) -> Option> { - self.by_name.read().get(conn_name).cloned() + self.by_name.load().get(conn_name).cloned() } pub fn len(&self) -> usize { - self.by_name.read().len() + self.by_name.load().len() } pub fn is_empty(&self) -> bool { - self.by_name.read().is_empty() + self.by_name.load().is_empty() } pub fn min_head_rpcs(&self) -> usize { @@ -506,169 +503,71 @@ impl Web3Rpcs { authorization: &Arc, request_metadata: Option<&Arc>, skip: &[Arc], - // TODO: if we are checking for the consensus head, i don' think we need min_block_needed/max_block_needed min_block_needed: Option<&U64>, max_block_needed: Option<&U64>, ) -> Web3ProxyResult { - let usable_rpcs_by_tier_and_head_number: BTreeMap< - (u64, Reverse>), - Vec>, - > = { - let mut m = BTreeMap::new(); + // TODO: use tracing and add this so logs are easy + let request_ulid = request_metadata.map(|x| &x.request_ulid); - if self.watch_consensus_head_sender.is_none() { - // pick any server + let usable_rpcs_by_tier_and_head_number = { + let mut m: RankedRpcMap = BTreeMap::new(); - let key = (0, Reverse(None)); + if let Some(consensus_rpcs) = self.watch_consensus_rpcs_sender.borrow().as_ref() { + // first place is the blocks that are synced close to head. if those don't work. try all the rpcs. if those don't work, keep trying for a few seconds - for x in self.by_name.read().values() { + let head_block = &consensus_rpcs.head_block; + + let head_block_num = *head_block.number(); + + let best_key = RpcRanking::new( + consensus_rpcs.tier, + consensus_rpcs.backups_needed, + Some(head_block_num), + ); + + // todo: for now, build the map m here. once that works, do as much of it as possible while building ConsensusWeb3Rpcs + for x in consensus_rpcs.best_rpcs.iter().filter(|rpc| { + consensus_rpcs.filter(skip, min_block_needed, max_block_needed, rpc) + }) { + m.entry(best_key).or_insert_with(Vec::new).push(x.clone()); + } + + let tier_offset = consensus_rpcs.tier + 1; + + for (k, v) in consensus_rpcs.other_rpcs.iter() { + let v: Vec<_> = v + .iter() + .filter(|rpc| { + consensus_rpcs.filter(skip, min_block_needed, max_block_needed, rpc) + }) + .cloned() + .collect(); + + let offset_ranking = k.add_offset(tier_offset); + + m.entry(offset_ranking).or_insert_with(Vec::new).extend(v); + } + } else if self.watch_consensus_head_sender.is_none() { + trace!("this Web3Rpcs is not tracking head blocks. pick any server"); + + for x in self.by_name.load().values() { if skip.contains(x) { - trace!("skipping: {}", x); + trace!("{:?} - already skipped. {}", request_ulid, x); continue; } - trace!("not skipped!"); + + let key = RpcRanking::default_with_backup(x.backup); m.entry(key).or_insert_with(Vec::new).push(x.clone()); } - } else { - let synced_connections = self.watch_consensus_rpcs_sender.borrow().clone(); - - if synced_connections.is_none() { - return Ok(OpenRequestResult::NotReady); - } - let synced_connections = - synced_connections.expect("synced_connections can't be None here"); - - let head_block_num = synced_connections.head_block.number(); - let head_block_age = synced_connections.head_block.age(); - - // TODO: double check the logic on this. especially if only min is set - let needed_blocks_comparison = match (min_block_needed, max_block_needed) { - (None, None) => { - // no required block given. treat this like they requested the consensus head block - cmp::Ordering::Equal - } - (None, Some(max_block_needed)) => max_block_needed.cmp(head_block_num), - (Some(min_block_needed), None) => min_block_needed.cmp(head_block_num), - (Some(min_block_needed), Some(max_block_needed)) => { - match min_block_needed.cmp(max_block_needed) { - cmp::Ordering::Less | cmp::Ordering::Equal => { - min_block_needed.cmp(head_block_num) - } - cmp::Ordering::Greater => { - // TODO: force a debug log of the original request to see if our logic is wrong? - // TODO: attach the rpc_key_id so we can find the user to ask if they need help - return Err(Web3ProxyError::InvalidBlockBounds { - min: min_block_needed.as_u64(), - max: max_block_needed.as_u64(), - }); - } - } - } - }; - - trace!("needed_blocks_comparison: {:?}", needed_blocks_comparison); - - // collect "usable_rpcs_by_head_num_and_weight" - match needed_blocks_comparison { - cmp::Ordering::Less => { - // need an old block. check all the rpcs. ignore rpcs that are still syncing - trace!("old block needed"); - - let min_block_age = - self.max_block_age.map(|x| head_block_age.saturating_sub(x)); - let min_sync_num = - self.max_block_lag.map(|x| head_block_num.saturating_sub(x)); - - // TODO: cache this somehow? - // TODO: maybe have a helper on synced_connections? that way sum_soft_limits/min_synced_rpcs will be DRY - for x in self - .by_name - .read() - .values() - .filter(|x| { - // TODO: move a bunch of this onto a rpc.is_synced function - #[allow(clippy::if_same_then_else)] - if skip.contains(x) { - // we've already tried this server or have some other reason to skip it - false - } else if max_block_needed - .map(|max_block_needed| !x.has_block_data(max_block_needed)) - .unwrap_or(false) - { - // server does not have the max block - trace!( - "{} does not have the max block ({:?})", - x, - max_block_needed - ); - false - } else { - !min_block_needed - .map(|min_block_needed| !x.has_block_data(min_block_needed)) - .unwrap_or(false) - } - }) - .cloned() - { - let x_head_block = x.head_block.read().clone(); - - if let Some(x_head) = x_head_block { - // TODO: should nodes that are ahead of the consensus block have priority? seems better to spread the load - let x_head_num = x_head.number().min(head_block_num); - - // TODO: do we really need to check head_num and age? - if let Some(min_sync_num) = min_sync_num.as_ref() { - if x_head_num < min_sync_num { - trace!("rpc is still syncing"); - continue; - } - } - if let Some(min_block_age) = min_block_age { - if x_head.age() > min_block_age { - // rpc is still syncing - trace!("server's block is too old"); - continue; - } - } - - let key = (x.tier, Reverse(Some(*x_head_num))); - - m.entry(key).or_insert_with(Vec::new).push(x); - } - } - - // TODO: check min_synced_rpcs and min_sum_soft_limits? or maybe better to just try to serve the request? - } - cmp::Ordering::Equal => { - // using the consensus head block. filter the synced rpcs - - // the key doesn't matter if we are checking synced connections - // they are all at the same block and it is already sized to what we need - let key = (0, Reverse(None)); - - for x in synced_connections.best_rpcs.iter() { - if skip.contains(x) { - trace!("skipping: {}", x); - continue; - } - trace!("not skipped!"); - - m.entry(key).or_insert_with(Vec::new).push(x.clone()); - } - } - cmp::Ordering::Greater => { - // TODO? if the blocks is close, maybe we could wait for change on a watch_consensus_connections_receiver().subscribe() - return Ok(OpenRequestResult::NotReady); - } - } } m }; trace!( - "usable_rpcs_by_tier_and_head_number: {:#?}", + "{:?} - usable_rpcs_by_tier_and_head_number: {:#?}", + request_ulid, usable_rpcs_by_tier_and_head_number ); @@ -689,16 +588,16 @@ impl Web3Rpcs { // pick the first two and try the one with the lower rpc.latency.ewma // TODO: chunks or tuple windows? for (rpc_a, rpc_b) in usable_rpcs.into_iter().circular_tuple_windows() { - trace!("{} vs {}", rpc_a, rpc_b); + trace!("{:?} - {} vs {}", request_ulid, rpc_a, rpc_b); // TODO: cached key to save a read lock // TODO: ties to the server with the smallest block_data_limit let best_rpc = min_by_key(rpc_a, rpc_b, |x| x.peak_ewma()); - trace!("winner: {}", best_rpc); + trace!("{:?} - winner: {}", request_ulid, best_rpc); // just because it has lower latency doesn't mean we are sure to get a connection match best_rpc.try_request_handle(authorization, None).await { Ok(OpenRequestResult::Handle(handle)) => { - // trace!("opened handle: {}", best_rpc); + trace!("{:?} - opened handle: {}", request_ulid, best_rpc); return Ok(OpenRequestResult::Handle(handle)); } Ok(OpenRequestResult::RetryAt(retry_at)) => { @@ -706,10 +605,15 @@ impl Web3Rpcs { } Ok(OpenRequestResult::NotReady) => { // TODO: log a warning? emit a stat? - trace!("best_rpc not ready: {}", best_rpc); + trace!("{:?} - best_rpc not ready: {}", request_ulid, best_rpc); } Err(err) => { - warn!("No request handle for {}. err={:?}", best_rpc, err) + trace!( + "{:?} - No request handle for {}. err={:?}", + request_ulid, + best_rpc, + err + ) } } } @@ -722,6 +626,7 @@ impl Web3Rpcs { match earliest_retry_at { None => { // none of the servers gave us a time to retry at + debug!("no servers on {:?} gave a retry time", self); // TODO: bring this back? need to think about how to do this with `allow_backups` // we could return an error here, but maybe waiting a second will fix the problem @@ -733,8 +638,6 @@ impl Web3Rpcs { // .await?; // Ok(OpenRequestResult::Handle(handle)) - // TODO: should we log here? - Ok(OpenRequestResult::NotReady) } Some(earliest_retry_at) => { @@ -763,7 +666,7 @@ impl Web3Rpcs { let mut max_count = if let Some(max_count) = max_count { max_count } else { - self.by_name.read().len() + self.by_name.load().len() }; trace!("max_count: {}", max_count); @@ -789,7 +692,7 @@ impl Web3Rpcs { // if there aren't enough synced connections, include more connections // TODO: only do this sorting if the synced_rpcs isn't enough - let mut all_rpcs: Vec<_> = self.by_name.read().values().cloned().collect(); + let mut all_rpcs: Vec<_> = self.by_name.load().values().cloned().collect(); all_rpcs.sort_by_cached_key(rpc_sync_status_sort_key); trace!("all_rpcs: {:#?}", all_rpcs); @@ -814,14 +717,14 @@ impl Web3Rpcs { if let Some(block_needed) = min_block_needed { if !rpc.has_block_data(block_needed) { - warn!("{} is missing min_block_needed. skipping", rpc); + trace!("{} is missing min_block_needed. skipping", rpc); continue; } } if let Some(block_needed) = max_block_needed { if !rpc.has_block_data(block_needed) { - warn!("{} is missing max_block_needed. skipping", rpc); + trace!("{} is missing max_block_needed. skipping", rpc); continue; } } @@ -860,7 +763,7 @@ impl Web3Rpcs { pub async fn try_send_best_consensus_head_connection( &self, authorization: &Arc, - request: JsonRpcRequest, + request: &JsonRpcRequest, request_metadata: Option<&Arc>, min_block_needed: Option<&U64>, max_block_needed: Option<&U64>, @@ -870,17 +773,17 @@ impl Web3Rpcs { let mut watch_consensus_connections = self.watch_consensus_rpcs_sender.subscribe(); - // TODO: maximum retries? right now its the total number of servers - loop { - if skip_rpcs.len() >= self.by_name.read().len() { - break; - } + let start = Instant::now(); + // TODO: get from config + let max_wait = Duration::from_secs(10); + + while start.elapsed() < max_wait { match self .best_available_rpc( authorization, request_metadata, - &skip_rpcs, + &[], min_block_needed, max_block_needed, ) @@ -890,16 +793,15 @@ impl Web3Rpcs { // save the rpc in case we get an error and want to retry on another server // TODO: look at backend_requests instead let rpc = active_request_handle.clone_connection(); - skip_rpcs.push(rpc.clone()); if let Some(request_metadata) = request_metadata { - request_metadata - .response_from_backup_rpc - .store(rpc.backup, Ordering::Release); - - request_metadata.backend_requests.lock().push(rpc); + request_metadata.backend_requests.lock().push(rpc.clone()); } + let is_backup_response = rpc.backup; + + skip_rpcs.push(rpc); + // TODO: get the log percent from the user data let response_result = active_request_handle .request( @@ -915,6 +817,13 @@ impl Web3Rpcs { request.id.clone(), ) { Ok(response) => { + // TODO: if there are multiple responses being aggregated, this will only use the last server's backup type + if let Some(request_metadata) = request_metadata { + request_metadata + .response_from_backup_rpc + .store(is_backup_response, Ordering::Release); + } + if let Some(error) = response.error.as_ref() { // trace!(?response, "rpc error"); @@ -992,11 +901,19 @@ impl Web3Rpcs { // TODO: if we get a TrySendError, reconnect. wait why do we see a trysenderror on a dual provider? shouldn't it be using reqwest - debug!( - "Backend server error on {}! Retrying on another. err={:#?}", - rpc, err + trace!( + "Backend server error on {}! Retrying {:?} on another. err={:?}", + rpc, + request, + err ); + if let Some(ref hard_limit_until) = rpc.hard_limit_until { + let retry_at = Instant::now() + Duration::from_secs(1); + + hard_limit_until.send_replace(retry_at); + } + continue; } } @@ -1033,10 +950,10 @@ impl Web3Rpcs { if watch_for_block(waiting_for, &mut watch_consensus_connections).await? { // block found! continue so we can check for another rpc - continue; } else { - // block won't be found without new servers being added - break; + // rate limits are likely keeping us from serving the head block + watch_consensus_connections.changed().await?; + watch_consensus_connections.borrow_and_update(); } } } @@ -1055,7 +972,7 @@ impl Web3Rpcs { return Ok(r); } - let num_conns = self.by_name.read().len(); + let num_conns = self.by_name.load().len(); let num_skipped = skip_rpcs.len(); let needed = min_block_needed.max(max_block_needed); @@ -1071,13 +988,15 @@ impl Web3Rpcs { "No servers synced (min {:?}, max {:?}, head {:?}) ({} known)", min_block_needed, max_block_needed, head_block_num, num_conns ); - } else if head_block_num > needed.copied() { + } else if head_block_num.as_ref() > needed { // we have synced past the needed block + // TODO: this is likely caused by rate limits. make the error message better error!( "No archive servers synced (min {:?}, max {:?}, head {:?}) ({} known)", min_block_needed, max_block_needed, head_block_num, num_conns ); } else if num_skipped == 0 { + // TODO: what should we log? } else { error!( "Requested data is not available (min {:?}, max {:?}, head {:?}) ({} skipped, {} known)", @@ -1092,7 +1011,7 @@ impl Web3Rpcs { Ok(JsonRpcForwardedResponse::from_str( "Requested data is not available", Some(-32043), - Some(request.id), + Some(request.id.clone()), )) } @@ -1111,7 +1030,12 @@ impl Web3Rpcs { ) -> Web3ProxyResult { let mut watch_consensus_rpcs = self.watch_consensus_rpcs_sender.subscribe(); - loop { + let start = Instant::now(); + + // TODO: get from config + let max_wait = Duration::from_secs(3); + + while start.elapsed() < max_wait { match self .all_connections( authorization, @@ -1123,20 +1047,16 @@ impl Web3Rpcs { .await { Ok(active_request_handles) => { - // TODO: benchmark this compared to waiting on unbounded futures - // TODO: do something with this handle? - // TODO: this is not working right. simplify - if let Some(request_metadata) = request_metadata { - let mut backup_used = false; + let mut only_backups_used = true; request_metadata.backend_requests.lock().extend( active_request_handles.iter().map(|x| { let rpc = x.clone_connection(); - if rpc.backup { + if !rpc.backup { // TODO: its possible we serve from a synced connection though. think about this more - backup_used = true; + only_backups_used = false; } x.clone_connection() @@ -1145,7 +1065,7 @@ impl Web3Rpcs { request_metadata .response_from_backup_rpc - .store(true, Ordering::Release); + .store(only_backups_used, Ordering::Release); } return self @@ -1195,12 +1115,14 @@ impl Web3Rpcs { } } } + + Err(Web3ProxyError::NoServersSynced) } pub async fn try_proxy_connection( &self, authorization: &Arc, - request: JsonRpcRequest, + request: &JsonRpcRequest, request_metadata: Option<&Arc>, min_block_needed: Option<&U64>, max_block_needed: Option<&U64>, @@ -1239,14 +1161,14 @@ impl Serialize for Web3Rpcs { let mut state = serializer.serialize_struct("Web3Rpcs", 6)?; { - let by_name = self.by_name.read(); + let by_name = self.by_name.load(); let rpcs: Vec<&Web3Rpc> = by_name.values().map(|x| x.as_ref()).collect(); // TODO: coordinate with frontend team to rename "conns" to "rpcs" state.serialize_field("conns", &rpcs)?; } { - let consensus_rpcs = self.watch_consensus_rpcs_sender.borrow(); + let consensus_rpcs = self.watch_consensus_rpcs_sender.borrow().clone(); // TODO: rename synced_connections to consensus_rpcs if let Some(consensus_rpcs) = consensus_rpcs.as_ref() { @@ -1273,25 +1195,13 @@ impl Serialize for Web3Rpcs { fn rpc_sync_status_sort_key(x: &Arc) -> (Reverse, u64, bool, OrderedFloat) { let head_block = x .head_block - .read() .as_ref() - .map(|x| *x.number()) + .and_then(|x| x.borrow().as_ref().map(|x| *x.number())) .unwrap_or_default(); let tier = x.tier; - // TODO: use request latency instead of head latency - // TODO: have the latency decay automatically - let peak_latency = x - .peak_latency - .as_ref() - .expect("peak_latency uniniialized") - .latency(); - - let active_requests = x.active_requests.load(atomic::Ordering::Relaxed) as f64; - - // TODO: i'm not sure head * active is exactly right. but we'll see - let peak_ewma = OrderedFloat(peak_latency.as_millis() as f64 * (active_requests + 1.0)); + let peak_ewma = x.peak_ewma(); let backup = x.backup; @@ -1306,7 +1216,7 @@ mod tests { use super::*; use crate::rpcs::consensus::ConsensusFinder; use crate::rpcs::{blockchain::Web3ProxyBlock, provider::Web3Provider}; - + use arc_swap::ArcSwap; use ethers::types::{Block, U256}; use latency::PeakEwmaLatency; use log::{trace, LevelFilter}; @@ -1344,46 +1254,53 @@ mod tests { .map(|x| Web3ProxyBlock::try_new(Arc::new(x)).unwrap()) .collect(); + let (tx_a, _) = watch::channel(None); + let (tx_b, _) = watch::channel(blocks.get(1).cloned()); + let (tx_c, _) = watch::channel(blocks.get(2).cloned()); + let (tx_d, _) = watch::channel(None); + let (tx_e, _) = watch::channel(blocks.get(1).cloned()); + let (tx_f, _) = watch::channel(blocks.get(2).cloned()); + let mut rpcs: Vec<_> = [ Web3Rpc { name: "a".to_string(), tier: 0, - head_block: RwLock::new(None), + head_block: Some(tx_a), peak_latency: Some(new_peak_latency()), ..Default::default() }, Web3Rpc { name: "b".to_string(), tier: 0, - head_block: RwLock::new(blocks.get(1).cloned()), + head_block: Some(tx_b), peak_latency: Some(new_peak_latency()), ..Default::default() }, Web3Rpc { name: "c".to_string(), tier: 0, - head_block: RwLock::new(blocks.get(2).cloned()), + head_block: Some(tx_c), peak_latency: Some(new_peak_latency()), ..Default::default() }, Web3Rpc { name: "d".to_string(), tier: 1, - head_block: RwLock::new(None), + head_block: Some(tx_d), peak_latency: Some(new_peak_latency()), ..Default::default() }, Web3Rpc { name: "e".to_string(), tier: 1, - head_block: RwLock::new(blocks.get(1).cloned()), + head_block: Some(tx_e), peak_latency: Some(new_peak_latency()), ..Default::default() }, Web3Rpc { name: "f".to_string(), tier: 1, - head_block: RwLock::new(blocks.get(2).cloned()), + head_block: Some(tx_f), peak_latency: Some(new_peak_latency()), ..Default::default() }, @@ -1428,11 +1345,10 @@ mod tests { let lagged_block = Arc::new(lagged_block); let head_block = Arc::new(head_block); - let mut lagged_block: Web3ProxyBlock = lagged_block.try_into().unwrap(); - let mut head_block: Web3ProxyBlock = head_block.try_into().unwrap(); - let block_data_limit = u64::MAX; + let (tx_synced, _) = watch::channel(None); + let head_rpc = Web3Rpc { name: "synced".to_string(), soft_limit: 1_000, @@ -1440,12 +1356,14 @@ mod tests { backup: false, block_data_limit: block_data_limit.into(), tier: 0, - head_block: RwLock::new(Some(head_block.clone())), + head_block: Some(tx_synced), provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), peak_latency: Some(new_peak_latency()), ..Default::default() }; + let (tx_lagged, _) = watch::channel(None); + let lagged_rpc = Web3Rpc { name: "lagged".to_string(), soft_limit: 1_000, @@ -1453,17 +1371,17 @@ mod tests { backup: false, block_data_limit: block_data_limit.into(), tier: 0, - head_block: RwLock::new(Some(lagged_block.clone())), + head_block: Some(tx_lagged), provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), peak_latency: Some(new_peak_latency()), ..Default::default() }; - assert!(head_rpc.has_block_data(lagged_block.number())); - assert!(head_rpc.has_block_data(head_block.number())); + assert!(!head_rpc.has_block_data(lagged_block.number.as_ref().unwrap())); + assert!(!head_rpc.has_block_data(head_block.number.as_ref().unwrap())); - assert!(lagged_rpc.has_block_data(lagged_block.number())); - assert!(!lagged_rpc.has_block_data(head_block.number())); + assert!(!lagged_rpc.has_block_data(lagged_block.number.as_ref().unwrap())); + assert!(!lagged_rpc.has_block_data(head_block.number.as_ref().unwrap())); let head_rpc = Arc::new(head_rpc); let lagged_rpc = Arc::new(lagged_rpc); @@ -1475,15 +1393,13 @@ mod tests { let (block_sender, _block_receiver) = flume::unbounded(); let (pending_tx_id_sender, pending_tx_id_receiver) = flume::unbounded(); - let (watch_consensus_rpcs_sender, _watch_consensus_rpcs_receiver) = - watch::channel(Default::default()); - let (watch_consensus_head_sender, _watch_consensus_head_receiver) = - watch::channel(Default::default()); + let (watch_consensus_rpcs_sender, _watch_consensus_rpcs_receiver) = watch::channel(None); + let (watch_consensus_head_sender, _watch_consensus_head_receiver) = watch::channel(None); // TODO: make a Web3Rpcs::new let rpcs = Web3Rpcs { - block_sender, - by_name: RwLock::new(rpcs_by_name), + block_sender: block_sender.clone(), + by_name: ArcSwap::from_pointee(rpcs_by_name), http_interval_sender: None, watch_consensus_head_sender: Some(watch_consensus_head_sender), watch_consensus_rpcs_sender, @@ -1539,9 +1455,15 @@ mod tests { 2 ); - // best_synced_backend_connection requires servers to be synced with the head block + // best_synced_backend_connection which servers to be synced with the head block should not find any nodes let x = rpcs - .best_available_rpc(&authorization, None, &[], None, None) + .best_available_rpc( + &authorization, + None, + &[], + Some(head_block.number.as_ref().unwrap()), + None, + ) .await .unwrap(); @@ -1550,37 +1472,74 @@ mod tests { assert!(matches!(x, OpenRequestResult::NotReady)); // add lagged blocks to the rpcs. both servers should be allowed - lagged_block = rpcs.try_cache_block(lagged_block, true).await.unwrap(); + lagged_rpc + .send_head_block_result( + Ok(Some(lagged_block.clone())), + &block_sender, + rpcs.blocks_by_hash.clone(), + ) + .await + .unwrap(); + // TODO: this is fragile rpcs.process_block_from_rpc( &authorization, &mut consensus_finder, - Some(lagged_block.clone()), - lagged_rpc, + Some(lagged_block.clone().try_into().unwrap()), + lagged_rpc.clone(), &None, ) .await .unwrap(); + + head_rpc + .send_head_block_result( + Ok(Some(lagged_block.clone())), + &block_sender, + rpcs.blocks_by_hash.clone(), + ) + .await + .unwrap(); + + // TODO: this is fragile rpcs.process_block_from_rpc( &authorization, &mut consensus_finder, - Some(lagged_block.clone()), + Some(lagged_block.clone().try_into().unwrap()), head_rpc.clone(), &None, ) .await .unwrap(); + // TODO: how do we spawn this and wait for it to process things? subscribe and watch consensus connections? + // rpcs.process_incoming_blocks(&authorization, block_receiver, pending_tx_sender) + + assert!(head_rpc.has_block_data(lagged_block.number.as_ref().unwrap())); + assert!(!head_rpc.has_block_data(head_block.number.as_ref().unwrap())); + + assert!(lagged_rpc.has_block_data(lagged_block.number.as_ref().unwrap())); + assert!(!lagged_rpc.has_block_data(head_block.number.as_ref().unwrap())); + + // todo!("this doesn't work anymore. send_head_block_result doesn't do anything when rpcs isn't watching the block_receiver") assert_eq!(rpcs.num_synced_rpcs(), 2); // add head block to the rpcs. lagged_rpc should not be available - head_block = rpcs.try_cache_block(head_block, true).await.unwrap(); + head_rpc + .send_head_block_result( + Ok(Some(head_block.clone())), + &block_sender, + rpcs.blocks_by_hash.clone(), + ) + .await + .unwrap(); + // TODO: this is fragile rpcs.process_block_from_rpc( &authorization, &mut consensus_finder, - Some(head_block.clone()), - head_rpc, + Some(head_block.clone().try_into().unwrap()), + head_rpc.clone(), &None, ) .await @@ -1588,18 +1547,27 @@ mod tests { assert_eq!(rpcs.num_synced_rpcs(), 1); + assert!(head_rpc.has_block_data(lagged_block.number.as_ref().unwrap())); + assert!(head_rpc.has_block_data(head_block.number.as_ref().unwrap())); + + assert!(lagged_rpc.has_block_data(lagged_block.number.as_ref().unwrap())); + assert!(!lagged_rpc.has_block_data(head_block.number.as_ref().unwrap())); + + // TODO: make sure the handle is for the expected rpc assert!(matches!( rpcs.best_available_rpc(&authorization, None, &[], None, None) .await, Ok(OpenRequestResult::Handle(_)) )); + // TODO: make sure the handle is for the expected rpc assert!(matches!( rpcs.best_available_rpc(&authorization, None, &[], Some(&0.into()), None) .await, Ok(OpenRequestResult::Handle(_)) )); + // TODO: make sure the handle is for the expected rpc assert!(matches!( rpcs.best_available_rpc(&authorization, None, &[], Some(&1.into()), None) .await, @@ -1634,6 +1602,8 @@ mod tests { let head_block: Web3ProxyBlock = Arc::new(head_block).try_into().unwrap(); + let (tx_pruned, _) = watch::channel(Some(head_block.clone())); + let pruned_rpc = Web3Rpc { name: "pruned".to_string(), soft_limit: 3_000, @@ -1641,11 +1611,13 @@ mod tests { backup: false, block_data_limit: 64.into(), tier: 1, - head_block: RwLock::new(Some(head_block.clone())), + head_block: Some(tx_pruned), provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), ..Default::default() }; + let (tx_archive, _) = watch::channel(Some(head_block.clone())); + let archive_rpc = Web3Rpc { name: "archive".to_string(), soft_limit: 1_000, @@ -1653,7 +1625,7 @@ mod tests { backup: false, block_data_limit: u64::MAX.into(), tier: 2, - head_block: RwLock::new(Some(head_block.clone())), + head_block: Some(tx_archive), provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), ..Default::default() }; @@ -1673,14 +1645,13 @@ mod tests { let (block_sender, _) = flume::unbounded(); let (pending_tx_id_sender, pending_tx_id_receiver) = flume::unbounded(); - let (watch_consensus_rpcs_sender, _) = watch::channel(Default::default()); - let (watch_consensus_head_sender, _watch_consensus_head_receiver) = - watch::channel(Default::default()); + let (watch_consensus_rpcs_sender, _) = watch::channel(None); + let (watch_consensus_head_sender, _watch_consensus_head_receiver) = watch::channel(None); // TODO: make a Web3Rpcs::new let rpcs = Web3Rpcs { block_sender, - by_name: RwLock::new(rpcs_by_name), + by_name: ArcSwap::from_pointee(rpcs_by_name), http_interval_sender: None, watch_consensus_head_sender: Some(watch_consensus_head_sender), watch_consensus_rpcs_sender, @@ -1791,6 +1762,9 @@ mod tests { let block_1: Web3ProxyBlock = Arc::new(block_1).try_into().unwrap(); let block_2: Web3ProxyBlock = Arc::new(block_2).try_into().unwrap(); + let (tx_mock_geth, _) = watch::channel(Some(block_1.clone())); + let (tx_mock_erigon_archive, _) = watch::channel(Some(block_2.clone())); + let mock_geth = Web3Rpc { name: "mock_geth".to_string(), soft_limit: 1_000, @@ -1798,7 +1772,7 @@ mod tests { backup: false, block_data_limit: 64.into(), tier: 0, - head_block: RwLock::new(Some(block_1.clone())), + head_block: Some(tx_mock_geth), provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), peak_latency: Some(new_peak_latency()), ..Default::default() @@ -1811,7 +1785,7 @@ mod tests { backup: false, block_data_limit: u64::MAX.into(), tier: 1, - head_block: RwLock::new(Some(block_2.clone())), + head_block: Some(tx_mock_erigon_archive), provider: AsyncRwLock::new(Some(Arc::new(Web3Provider::Mock))), peak_latency: Some(new_peak_latency()), ..Default::default() @@ -1835,14 +1809,13 @@ mod tests { let (block_sender, _) = flume::unbounded(); let (pending_tx_id_sender, pending_tx_id_receiver) = flume::unbounded(); - let (watch_consensus_rpcs_sender, _) = watch::channel(Default::default()); - let (watch_consensus_head_sender, _watch_consensus_head_receiver) = - watch::channel(Default::default()); + let (watch_consensus_rpcs_sender, _) = watch::channel(None); + let (watch_consensus_head_sender, _watch_consensus_head_receiver) = watch::channel(None); // TODO: make a Web3Rpcs::new let rpcs = Web3Rpcs { block_sender, - by_name: RwLock::new(rpcs_by_name), + by_name: ArcSwap::from_pointee(rpcs_by_name), http_interval_sender: None, watch_consensus_head_sender: Some(watch_consensus_head_sender), watch_consensus_rpcs_sender, @@ -1943,6 +1916,8 @@ async fn watch_for_block( // we are past this block and no servers have this block // this happens if the block is old and all archive servers are offline // there is no chance we will get this block without adding an archive server to the config + + // TODO: i think this can also happen if we are being rate limited! return Ok(false); } } @@ -1976,3 +1951,23 @@ async fn watch_for_block( Ok(true) } + +#[cfg(test)] +mod test { + use std::cmp::Reverse; + + #[test] + fn test_block_num_sort() { + let test_vec = vec![ + Reverse(Some(3)), + Reverse(Some(2)), + Reverse(Some(1)), + Reverse(None), + ]; + + let mut sorted_vec = test_vec.clone(); + sorted_vec.sort(); + + assert_eq!(test_vec, sorted_vec); + } +} diff --git a/web3_proxy/src/rpcs/one.rs b/web3_proxy/src/rpcs/one.rs index 71b67e88..c541c0da 100644 --- a/web3_proxy/src/rpcs/one.rs +++ b/web3_proxy/src/rpcs/one.rs @@ -46,15 +46,15 @@ pub struct Web3Rpc { /// provider is in a RwLock so that we can replace it if re-connecting /// it is an async lock because we hold it open across awaits /// this provider is only used for new heads subscriptions - /// TODO: watch channel instead of a lock - /// TODO: is this only used for new heads subscriptions? if so, rename + /// TODO: benchmark ArcSwapOption and a watch::Sender pub(super) provider: AsyncRwLock>>, - /// keep track of hard limits. Optional because we skip this code for our own servers. + /// keep track of hard limits + /// this is only inside an Option so that the "Default" derive works. it will always be set. pub(super) hard_limit_until: Option>, /// rate limits are stored in a central redis so that multiple proxies can share their rate limits /// We do not use the deferred rate limiter because going over limits would cause errors pub(super) hard_limit: Option, - /// used for load balancing to the least loaded server + /// used for ensuring enough requests are available before advancing the head block pub(super) soft_limit: u32, /// use web3 queries to find the block data limit for archive/pruned nodes pub(super) automatic_block_limit: bool, @@ -65,7 +65,8 @@ pub struct Web3Rpc { /// Lower tiers are higher priority when sending requests pub(super) tier: u64, /// TODO: change this to a watch channel so that http providers can subscribe and take action on change. - pub(super) head_block: RwLock>, + /// this is only inside an Option so that the "Default" derive works. it will always be set. + pub(super) head_block: Option>>, /// Track head block latency pub(super) head_latency: RwLock, /// Track peak request latency @@ -96,8 +97,6 @@ impl Web3Rpc { // TODO: rename to http_new_head_interval_sender? http_interval_sender: Option>>, redis_pool: Option, - // TODO: think more about soft limit. watching ewma of requests is probably better. but what should the random sort be on? maybe group on tier is enough - // soft_limit: u32, block_map: BlocksByHashCache, block_sender: Option>, tx_id_sender: Option)>>, @@ -139,15 +138,9 @@ impl Web3Rpc { let automatic_block_limit = (block_data_limit.load(atomic::Ordering::Acquire) == 0) && block_sender.is_some(); - // track hard limit until on backup servers (which might surprise us with rate limit changes) + // have a sender for tracking hard limit anywhere. we use this in case we // and track on servers that have a configured hard limit - let hard_limit_until = if backup || hard_limit.is_some() { - let (sender, _) = watch::channel(Instant::now()); - - Some(sender) - } else { - None - }; + let (hard_limit_until, _) = watch::channel(Instant::now()); if config.ws_url.is_none() && config.http_url.is_none() { if let Some(url) = config.url { @@ -168,6 +161,8 @@ impl Web3Rpc { let (disconnect_sender, disconnect_receiver) = watch::channel(false); let reconnect = reconnect.into(); + let (head_block, _) = watch::channel(None); + // Spawn the task for calculting average peak latency // TODO Should these defaults be in config let peak_latency = PeakEwmaLatency::spawn( @@ -200,7 +195,7 @@ impl Web3Rpc { ws_url, http_url, hard_limit, - hard_limit_until, + hard_limit_until: Some(hard_limit_until), soft_limit: config.soft_limit, automatic_block_limit, backup, @@ -209,7 +204,7 @@ impl Web3Rpc { tier: config.tier, disconnect_watch: Some(disconnect_sender), created_at: Some(created_at), - head_block: RwLock::new(Default::default()), + head_block: Some(head_block), peak_latency: Some(peak_latency), ..Default::default() }; @@ -352,8 +347,9 @@ impl Web3Rpc { self.block_data_limit.load(atomic::Ordering::Acquire).into() } + /// TODO: get rid of this now that consensus rpcs does it pub fn has_block_data(&self, needed_block_num: &U64) -> bool { - let head_block_num = match self.head_block.read().as_ref() { + let head_block_num = match self.head_block.as_ref().unwrap().borrow().as_ref() { None => return false, Some(x) => *x.number(), }; @@ -483,8 +479,10 @@ impl Web3Rpc { } // reset sync status - let mut head_block = self.head_block.write(); - *head_block = None; + self.head_block + .as_ref() + .expect("head_block should always be set") + .send_replace(None); // disconnect the current provider // TODO: what until the block_sender's receiver finishes updating this item? @@ -587,7 +585,7 @@ impl Web3Rpc { Ok(()) } - async fn send_head_block_result( + pub(crate) async fn send_head_block_result( self: &Arc, new_head_block: Result, ProviderError>, block_sender: &flume::Sender, @@ -596,9 +594,9 @@ impl Web3Rpc { let new_head_block = match new_head_block { Ok(None) => { { - let mut head_block = self.head_block.write(); + let head_block_tx = self.head_block.as_ref().unwrap(); - if head_block.is_none() { + if head_block_tx.borrow().is_none() { // we previously sent a None. return early return Ok(()); } @@ -607,7 +605,7 @@ impl Web3Rpc { debug!("clearing head block on {} ({}ms old)!", self, age); - *head_block = None; + head_block_tx.send_replace(None); } None @@ -625,11 +623,10 @@ impl Web3Rpc { // save the block so we don't send the same one multiple times // also save so that archive checks can know how far back to query - { - let mut head_block = self.head_block.write(); - - let _ = head_block.insert(new_head_block.clone()); - } + self.head_block + .as_ref() + .unwrap() + .send_replace(Some(new_head_block.clone())); if self.block_data_limit() == U64::zero() { let authorization = Arc::new(Authorization::internal(self.db_conn.clone())?); @@ -646,11 +643,7 @@ impl Web3Rpc { Err(err) => { warn!("unable to get block from {}. err={:?}", self, err); - { - let mut head_block = self.head_block.write(); - - *head_block = None; - } + self.head_block.as_ref().unwrap().send_replace(None); None } @@ -750,7 +743,7 @@ impl Web3Rpc { if new_total_requests - old_total_requests < 10 { // TODO: if this fails too many times, reset the connection // TODO: move this into a function and the chaining should be easier - let head_block = rpc.head_block.read().clone(); + let head_block = rpc.head_block.as_ref().unwrap().borrow().clone(); if let Some((block_number, txid)) = head_block.and_then(|x| { let block = x.block; @@ -947,16 +940,25 @@ impl Web3Rpc { .await?; } Ok(Some(block)) => { - // don't send repeat blocks - let new_hash = - block.hash.expect("blocks here should always have hashes"); + if let Some(new_hash) = block.hash { + // don't send repeat blocks + if new_hash != last_hash { + // new hash! + last_hash = new_hash; - if new_hash != last_hash { - // new hash! - last_hash = new_hash; + self.send_head_block_result( + Ok(Some(block)), + &block_sender, + block_map.clone(), + ) + .await?; + } + } else { + // TODO: why is this happening? + warn!("empty head block on {}", self); self.send_head_block_result( - Ok(Some(block)), + Ok(None), &block_sender, block_map.clone(), ) @@ -1387,7 +1389,12 @@ impl Serialize for Web3Rpc { state.serialize_field("soft_limit", &self.soft_limit)?; // TODO: maybe this is too much data. serialize less? - state.serialize_field("head_block", &*self.head_block.read())?; + { + let head_block = self.head_block.as_ref().unwrap(); + let head_block = head_block.borrow(); + let head_block = head_block.as_ref(); + state.serialize_field("head_block", &head_block)?; + } state.serialize_field("head_latency", &self.head_latency.read().value())?; @@ -1445,6 +1452,8 @@ mod tests { let head_block = Web3ProxyBlock::try_new(random_block).unwrap(); let block_data_limit = u64::MAX; + let (tx, _) = watch::channel(Some(head_block.clone())); + let x = Web3Rpc { name: "name".to_string(), ws_url: Some("ws://example.com".parse::().unwrap()), @@ -1453,7 +1462,7 @@ mod tests { backup: false, block_data_limit: block_data_limit.into(), tier: 0, - head_block: RwLock::new(Some(head_block.clone())), + head_block: Some(tx), ..Default::default() }; @@ -1479,6 +1488,8 @@ mod tests { let block_data_limit = 64; + let (tx, _rx) = watch::channel(Some(head_block.clone())); + let x = Web3Rpc { name: "name".to_string(), soft_limit: 1_000, @@ -1486,7 +1497,7 @@ mod tests { backup: false, block_data_limit: block_data_limit.into(), tier: 0, - head_block: RwLock::new(Some(head_block.clone())), + head_block: Some(tx), ..Default::default() }; diff --git a/web3_proxy/src/stats/db_queries.rs b/web3_proxy/src/stats/db_queries.rs index ccc9404a..8830cbf9 100644 --- a/web3_proxy/src/stats/db_queries.rs +++ b/web3_proxy/src/stats/db_queries.rs @@ -273,6 +273,7 @@ pub async fn query_user_stats<'a>( .expect("max-age should always parse"), ); + // TODO: get this from `response` isntead of json serializing twice let cache_body = json!(response_body).to_string(); if let Err(err) = redis_conn diff --git a/web3_proxy/src/stats/influxdb_queries.rs b/web3_proxy/src/stats/influxdb_queries.rs index d946dc2d..29747c65 100644 --- a/web3_proxy/src/stats/influxdb_queries.rs +++ b/web3_proxy/src/stats/influxdb_queries.rs @@ -125,7 +125,7 @@ pub async fn query_user_stats<'a>( user_rpc_keys.append(&mut subuser_rpc_keys); - if user_rpc_keys.len() == 0 { + if user_rpc_keys.is_empty() { return Err(Web3ProxyError::BadRequest( "User has no secret RPC keys yet".to_string(), )); diff --git a/web3_proxy/src/stats/mod.rs b/web3_proxy/src/stats/mod.rs index af148e7d..930f9e04 100644 --- a/web3_proxy/src/stats/mod.rs +++ b/web3_proxy/src/stats/mod.rs @@ -2,36 +2,36 @@ //! TODO: move some of these structs/functions into their own file? pub mod db_queries; pub mod influxdb_queries; -use crate::app::AuthorizationChecks; +mod stat_buffer; + +pub use stat_buffer::{SpawnedStatBuffer, StatBuffer}; + +use crate::app::RpcSecretKeyCache; use crate::frontend::authorization::{Authorization, RequestMetadata}; +use crate::frontend::errors::Web3ProxyError; +use crate::rpcs::one::Web3Rpc; use anyhow::Context; use axum::headers::Origin; use chrono::{DateTime, Months, TimeZone, Utc}; use derive_more::From; use entities::sea_orm_active_enums::TrackingLevel; use entities::{balance, referee, referrer, rpc_accounting_v2, rpc_key, user, user_tier}; -use futures::stream; -use hashbrown::HashMap; -use influxdb2::api::write::TimestampPrecision; use influxdb2::models::DataPoint; -use log::{error, info, trace, warn}; +use log::{trace, warn}; use migration::sea_orm::prelude::Decimal; -use migration::sea_orm::ActiveModelTrait; -use migration::sea_orm::ColumnTrait; -use migration::sea_orm::IntoActiveModel; -use migration::sea_orm::{self, DatabaseConnection, EntityTrait, QueryFilter}; +use migration::sea_orm::{ + self, ActiveModelTrait, ColumnTrait, DatabaseConnection, EntityTrait, IntoActiveModel, + QueryFilter, +}; use migration::{Expr, OnConflict}; -use moka::future::Cache; use num_traits::ToPrimitive; +use parking_lot::Mutex; use std::cmp::max; use std::num::NonZeroU64; -use std::sync::atomic::Ordering; +use std::sync::atomic::{self, Ordering}; use std::sync::Arc; -use std::time::Duration; -use tokio::sync::broadcast; -use tokio::task::JoinHandle; -use tokio::time::interval; -use ulid::Ulid; + +use self::stat_buffer::BufferedRpcQueryStats; #[derive(Debug, PartialEq, Eq)] pub enum StatType { @@ -39,8 +39,9 @@ pub enum StatType { Detailed, } -// Pub is needed for migration ... I could also write a second constructor for this if needed -/// TODO: better name? +pub type BackendRequests = Mutex>>; + +/// TODO: better name? RpcQueryStatBuilder? #[derive(Clone, Debug)] pub struct RpcQueryStats { pub authorization: Arc, @@ -49,8 +50,8 @@ pub struct RpcQueryStats { pub error_response: bool, pub request_bytes: u64, /// if backend_requests is 0, there was a cache_hit - // pub frontend_request: u64, - pub backend_requests: u64, + /// no need to track frontend_request on this. a RpcQueryStats always represents one frontend request + pub backend_rpcs_used: Vec>, pub response_bytes: u64, pub response_millis: u64, pub response_timestamp: i64, @@ -58,7 +59,7 @@ pub struct RpcQueryStats { pub credits_used: Decimal, } -#[derive(Clone, From, Hash, PartialEq, Eq)] +#[derive(Clone, Debug, From, Hash, PartialEq, Eq)] pub struct RpcQueryKey { /// unix epoch time /// for the time series db, this is (close to) the time that the response was sent @@ -181,22 +182,6 @@ impl RpcQueryStats { } } -#[derive(Default)] -pub struct BufferedRpcQueryStats { - pub frontend_requests: u64, - pub backend_requests: u64, - pub backend_retries: u64, - pub no_servers: u64, - pub cache_misses: u64, - pub cache_hits: u64, - pub sum_request_bytes: u64, - pub sum_response_bytes: u64, - pub sum_response_millis: u64, - pub sum_credits_used: Decimal, - /// Balance tells us the user's balance at this point in time - pub latest_balance: Decimal, -} - /// A stat that we aggregate and then store in a database. /// For now there is just one, but I think there might be others later #[derive(Debug, From)] @@ -204,34 +189,16 @@ pub enum AppStat { RpcQuery(RpcQueryStats), } -#[derive(From)] -pub struct SpawnedStatBuffer { - pub stat_sender: flume::Sender, - /// these handles are important and must be allowed to finish - pub background_handle: JoinHandle>, -} - -pub struct StatBuffer { - chain_id: u64, - db_conn: Option, - influxdb_client: Option, - tsdb_save_interval_seconds: u32, - rpc_secret_key_cache: - Option>, - db_save_interval_seconds: u32, - billing_period_seconds: i64, - global_timeseries_buffer: HashMap, - opt_in_timeseries_buffer: HashMap, - accounting_db_buffer: HashMap, - timestamp_precision: TimestampPrecision, -} - +// TODO: move to stat_buffer.rs? impl BufferedRpcQueryStats { fn add(&mut self, stat: RpcQueryStats) { // a stat always come from just 1 frontend request self.frontend_requests += 1; - if stat.backend_requests == 0 { + // TODO: is this always okay? is it true that each backend rpc will only be queried once per request? i think so + let num_backend_rpcs_used = stat.backend_rpcs_used.len() as u64; + + if num_backend_rpcs_used == 0 { // no backend request. cache hit! self.cache_hits += 1; } else { @@ -239,7 +206,7 @@ impl BufferedRpcQueryStats { self.cache_misses += 1; // a single frontend request might have multiple backend requests - self.backend_requests += stat.backend_requests; + self.backend_requests += num_backend_rpcs_used; } self.sum_request_bytes += stat.request_bytes; @@ -261,13 +228,21 @@ impl BufferedRpcQueryStats { chain_id: u64, db_conn: &DatabaseConnection, key: RpcQueryKey, + rpc_secret_key_cache: Option<&RpcSecretKeyCache>, ) -> anyhow::Result<()> { + anyhow::ensure!( + key.response_timestamp > 0, + "no response_timestamp! This is a bug! {:?} {:?}", + key, + self + ); + let period_datetime = Utc.timestamp_opt(key.response_timestamp, 0).unwrap(); // this is a lot of variables let accounting_entry = rpc_accounting_v2::ActiveModel { id: sea_orm::NotSet, - rpc_key_id: sea_orm::Set(key.rpc_secret_key_id.map(Into::into).unwrap_or_default()), + rpc_key_id: sea_orm::Set(key.rpc_secret_key_id.map(Into::into)), chain_id: sea_orm::Set(chain_id), period_datetime: sea_orm::Set(period_datetime), archive_needed: sea_orm::Set(key.archive_needed), @@ -360,7 +335,7 @@ impl BufferedRpcQueryStats { // Technicall there should always be a user ... still let's return "Ok(())" for now let sender_user_id: u64 = match sender_rpc_key { - Some(x) => x.user_id.into(), + Some(x) => x.user_id, // Return early if the User is not found, because then it is an anonymous user // Let's also issue a warning because obviously the RPC key should correspond to a user None => { @@ -390,12 +365,14 @@ impl BufferedRpcQueryStats { // Still subtract from the user in any case, // Modify the balance of the sender completely (in mysql, next to the stats) // In any case, add this to "spent" + // TODO! we need to do the math in mysql (like with `Expr::col` above). if we do the addition here, there is a race condition active_sender_balance.used_balance = - sea_orm::Set(sender_balance.used_balance + Decimal::from(self.sum_credits_used)); + sea_orm::Set(sender_balance.used_balance + self.sum_credits_used); // Also update the available balance + // TODO! this needs to be queried from the database let new_available_balance = max( - sender_balance.available_balance - Decimal::from(self.sum_credits_used), + sender_balance.available_balance - self.sum_credits_used, Decimal::from(0), ); active_sender_balance.available_balance = sea_orm::Set(new_available_balance); @@ -424,15 +401,26 @@ impl BufferedRpcQueryStats { ))?; // Downgrade a user to premium - out of funds if there's less than 10$ in the account, and if the user was premium before + // TODO: lets let them get under $1 + // TODO: instead of checking for a specific title, downgrade if the downgrade id is set to anything if new_available_balance < Decimal::from(10u64) && downgrade_user_role.title == "Premium" { + // TODO: we could do this outside the balance low block, but I think its fine. or better, update the cache if <$10 and downgrade if <$1 + if let Some(rpc_secret_key_cache) = rpc_secret_key_cache { + todo!("expire (or probably better to update) the user cache now that the balance is low"); + // actually i think we need to have 2 caches. otherwise users with 2 keys are going to have seperate caches + // 1. rpc_secret_key_id -> AuthorizationChecks (cuz we don't want to hit the db every time) + // 2. user_id -> Balance + } + // Only downgrade the user in local process memory, not elsewhere - // app.rpc_secret_key_cache- // let mut active_downgrade_user = downgrade_user.into_active_model(); // active_downgrade_user.user_tier_id = sea_orm::Set(downgrade_user_role.id); // active_downgrade_user.save(db_conn).await?; } + // TODO: + // Get the referee, and the referrer // (2) Look up the code that this user used. This is the referee table let referee_object = match referee::Entity::find() @@ -459,6 +447,7 @@ impl BufferedRpcQueryStats { { Some(x) => x, None => { + // TODO: warn seems too verbose for this. it should be fine for a user to not have a referall code, right? warn!( "No referrer with that referral code was found {:?}", referee_object @@ -487,6 +476,7 @@ impl BufferedRpcQueryStats { } }; + // TODO: don't clone on this. use the active_model later let mut active_sender_balance = sender_balance.clone().into_active_model(); let referrer_balance = match balance::Entity::find() .filter(balance::Column::UserId.eq(user_with_that_referral_code.user_id)) @@ -513,6 +503,7 @@ impl BufferedRpcQueryStats { { // (6) If the credits have not yet been applied to the referee, apply 10M credits / $100.00 USD worth of credits. // Make it into an active model, and add credits + // TODO! race condition here! we can't set. need to let the db do the math active_sender_balance.available_balance = sea_orm::Set(sender_balance.available_balance + Decimal::from(100)); // Also mark referral as "credits_applied_for_referee" @@ -528,8 +519,7 @@ impl BufferedRpcQueryStats { let mut active_referrer_balance = referrer_balance.clone().into_active_model(); // Add 10% referral fees ... active_referrer_balance.available_balance = sea_orm::Set( - referrer_balance.available_balance - + Decimal::from(self.sum_credits_used / Decimal::from(10)), + referrer_balance.available_balance + self.sum_credits_used / Decimal::from(10), ); // Also record how much the current referrer has "provided" / "gifted" away active_referee.credits_applied_for_referrer = @@ -598,51 +588,80 @@ impl BufferedRpcQueryStats { } } -impl RpcQueryStats { - pub fn new( - method: Option, - authorization: Arc, - metadata: Arc, - response_bytes: usize, - ) -> Self { - // TODO: try_unwrap the metadata to be sure that all the stats for this request have been collected - // TODO: otherwise, i think the whole thing should be in a single lock that we can "reset" when a stat is created +impl TryFrom for RpcQueryStats { + type Error = Web3ProxyError; + + fn try_from(mut metadata: RequestMetadata) -> Result { + let mut authorization = metadata.authorization.take(); + + if authorization.is_none() { + authorization = Some(Arc::new(Authorization::internal(None)?)); + } + + let authorization = authorization.expect("Authorization will always be set"); let archive_request = metadata.archive_request.load(Ordering::Acquire); - let backend_requests = metadata.backend_requests.lock().len() as u64; - let request_bytes = metadata.request_bytes; - let error_response = metadata.error_response.load(Ordering::Acquire); - let response_millis = metadata.start_instant.elapsed().as_millis() as u64; - let response_bytes = response_bytes as u64; - // TODO: Gotta make the arithmetic here + // TODO: do this without cloning. we can take their vec + let backend_rpcs_used = metadata.backend_rpcs_used(); + + let request_bytes = metadata.request_bytes as u64; + let response_bytes = metadata.response_bytes.load(Ordering::Acquire); + + let mut error_response = metadata.error_response.load(Ordering::Acquire); + let mut response_millis = metadata.response_millis.load(atomic::Ordering::Acquire); + + let response_timestamp = match metadata.response_timestamp.load(atomic::Ordering::Acquire) { + 0 => { + // no response timestamp! + if !error_response { + // force error_response to true + // this can happen when a try operator escapes and metadata.add_response() isn't called + trace!( + "no response known, but no errors logged. investigate. {:?}", + metadata + ); + error_response = true; + } + + if response_millis == 0 { + // get something for millis even if it is a bit late + response_millis = metadata.start_instant.elapsed().as_millis() as u64 + } + + // no timestamp given. likely handling an error. set it to the current time + Utc::now().timestamp() + } + x => x, + }; + + let method = metadata.method.take(); - // TODO: Depending on the method, metadata and response bytes, pick a different number of credits used - // This can be a slightly more complex function as we ll - // TODO: Here, let's implement the formula let credits_used = Self::compute_cost( request_bytes, response_bytes, - backend_requests == 0, - &method, + backend_rpcs_used.is_empty(), + method.as_deref(), ); - let response_timestamp = Utc::now().timestamp(); - - Self { + let x = Self { authorization, archive_request, method, - backend_requests, + backend_rpcs_used, request_bytes, error_response, response_bytes, response_millis, response_timestamp, credits_used, - } - } + }; + Ok(x) + } +} + +impl RpcQueryStats { /// Compute cost per request /// All methods cost the same /// The number of bytes are based on input, and output bytes @@ -650,251 +669,39 @@ impl RpcQueryStats { request_bytes: u64, response_bytes: u64, cache_hit: bool, - _method: &Option, + method: Option<&str>, ) -> Decimal { - // TODO: Should make these lazy_static const? + // some methods should be free. there might be cases where method isn't set (though they should be uncommon) + // TODO: get this list from config (and add more to it) + if let Some(method) = method.as_ref() { + if ["eth_chainId"].contains(method) { + return 0.into(); + } + } + + // TODO: get cost_minimum, cost_free_bytes, cost_per_byte, cache_hit_divisor from config. each chain will be different // pays at least $0.000018 / credits per request let cost_minimum = Decimal::new(18, 6); + // 1kb is included on each call let cost_free_bytes = 1024; + // after that, we add cost per bytes, $0.000000006 / credits per byte + // amazon charges $.09/GB outbound + // but we also have to cover our RAM and expensive nics on the servers (haproxy/web3-proxy/blockchains) let cost_per_byte = Decimal::new(6, 9); let total_bytes = request_bytes + response_bytes; - let total_chargable_bytes = - Decimal::from(max(0, total_bytes as i64 - cost_free_bytes as i64)); - let out = cost_minimum + cost_per_byte * total_chargable_bytes; + let total_chargable_bytes = Decimal::from(total_bytes.saturating_sub(cost_free_bytes)); + + let mut cost = cost_minimum + cost_per_byte * total_chargable_bytes; + + // cache hits get a 50% discount if cache_hit { - out * Decimal::new(5, 1) - } else { - out + cost /= Decimal::from(2) } - } - /// Only used for migration from stats_v1 to stats_v2/v3 - pub fn modify_struct( - &mut self, - response_millis: u64, - response_timestamp: i64, - backend_requests: u64, - ) { - self.response_millis = response_millis; - self.response_timestamp = response_timestamp; - self.backend_requests = backend_requests; - } -} - -impl StatBuffer { - #[allow(clippy::too_many_arguments)] - pub fn try_spawn( - chain_id: u64, - bucket: String, - db_conn: Option, - influxdb_client: Option, - rpc_secret_key_cache: Option< - Cache, - >, - db_save_interval_seconds: u32, - tsdb_save_interval_seconds: u32, - billing_period_seconds: i64, - shutdown_receiver: broadcast::Receiver<()>, - ) -> anyhow::Result> { - if db_conn.is_none() && influxdb_client.is_none() { - return Ok(None); - } - - let (stat_sender, stat_receiver) = flume::unbounded(); - - let timestamp_precision = TimestampPrecision::Seconds; - let mut new = Self { - chain_id, - db_conn, - influxdb_client, - db_save_interval_seconds, - tsdb_save_interval_seconds, - rpc_secret_key_cache, - billing_period_seconds, - global_timeseries_buffer: Default::default(), - opt_in_timeseries_buffer: Default::default(), - accounting_db_buffer: Default::default(), - timestamp_precision, - }; - - // any errors inside this task will cause the application to exit - let handle = tokio::spawn(async move { - new.aggregate_and_save_loop(bucket, stat_receiver, shutdown_receiver) - .await - }); - - Ok(Some((stat_sender, handle).into())) - } - - async fn aggregate_and_save_loop( - &mut self, - bucket: String, - stat_receiver: flume::Receiver, - mut shutdown_receiver: broadcast::Receiver<()>, - ) -> anyhow::Result<()> { - let mut tsdb_save_interval = - interval(Duration::from_secs(self.tsdb_save_interval_seconds as u64)); - let mut db_save_interval = - interval(Duration::from_secs(self.db_save_interval_seconds as u64)); - - // TODO: Somewhere here we should probably be updating the balance of the user - // And also update the credits used etc. for the referred user - loop { - tokio::select! { - stat = stat_receiver.recv_async() => { - // info!("Received stat"); - // save the stat to a buffer - match stat { - Ok(AppStat::RpcQuery(stat)) => { - if self.influxdb_client.is_some() { - // TODO: round the timestamp at all? - - let global_timeseries_key = stat.global_timeseries_key(); - - self.global_timeseries_buffer.entry(global_timeseries_key).or_default().add(stat.clone()); - - if let Some(opt_in_timeseries_key) = stat.opt_in_timeseries_key() { - self.opt_in_timeseries_buffer.entry(opt_in_timeseries_key).or_default().add(stat.clone()); - } - } - - if self.db_conn.is_some() { - self.accounting_db_buffer.entry(stat.accounting_key(self.billing_period_seconds)).or_default().add(stat); - } - } - Err(err) => { - error!("error receiving stat: {:?}", err); - break; - } - } - } - _ = db_save_interval.tick() => { - // info!("DB save internal tick"); - let count = self.save_relational_stats().await; - if count > 0 { - trace!("Saved {} stats to the relational db", count); - } - } - _ = tsdb_save_interval.tick() => { - // info!("TSDB save internal tick"); - let count = self.save_tsdb_stats(&bucket).await; - if count > 0 { - trace!("Saved {} stats to the tsdb", count); - } - } - x = shutdown_receiver.recv() => { - info!("shutdown signal ---"); - match x { - Ok(_) => { - info!("stat_loop shutting down"); - }, - Err(err) => error!("stat_loop shutdown receiver err={:?}", err), - } - break; - } - } - } - - let saved_relational = self.save_relational_stats().await; - - info!("saved {} pending relational stats", saved_relational); - - let saved_tsdb = self.save_tsdb_stats(&bucket).await; - - info!("saved {} pending tsdb stats", saved_tsdb); - - info!("accounting and stat save loop complete"); - - Ok(()) - } - - async fn save_relational_stats(&mut self) -> usize { - let mut count = 0; - - if let Some(db_conn) = self.db_conn.as_ref() { - count = self.accounting_db_buffer.len(); - for (key, stat) in self.accounting_db_buffer.drain() { - // TODO: batch saves - // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now - if let Err(err) = stat.save_db(self.chain_id, db_conn, key).await { - error!("unable to save accounting entry! err={:?}", err); - }; - } - } - - count - } - - // TODO: bucket should be an enum so that we don't risk typos - async fn save_tsdb_stats(&mut self, bucket: &str) -> usize { - let mut count = 0; - - if let Some(influxdb_client) = self.influxdb_client.as_ref() { - // TODO: use stream::iter properly to avoid allocating this Vec - let mut points = vec![]; - - for (key, stat) in self.global_timeseries_buffer.drain() { - // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now - match stat - .build_timeseries_point("global_proxy", self.chain_id, key) - .await - { - Ok(point) => { - points.push(point); - } - Err(err) => { - error!("unable to build global stat! err={:?}", err); - } - }; - } - - for (key, stat) in self.opt_in_timeseries_buffer.drain() { - // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now - match stat - .build_timeseries_point("opt_in_proxy", self.chain_id, key) - .await - { - Ok(point) => { - points.push(point); - } - Err(err) => { - // TODO: if this errors, we throw away some of the pending stats! we should probably buffer them somewhere to be tried again - error!("unable to build opt-in stat! err={:?}", err); - } - }; - } - - count = points.len(); - - if count > 0 { - // TODO: put max_batch_size in config? - // TODO: i think the real limit is the byte size of the http request. so, a simple line count won't work very well - let max_batch_size = 100; - - let mut num_left = count; - - while num_left > 0 { - let batch_size = num_left.min(max_batch_size); - - let p = points.split_off(batch_size); - - num_left -= batch_size; - - if let Err(err) = influxdb_client - .write_with_precision(bucket, stream::iter(p), self.timestamp_precision) - .await - { - // TODO: if this errors, we throw away some of the pending stats! we should probably buffer them somewhere to be tried again - error!("unable to save {} tsdb stats! err={:?}", batch_size, err); - } - } - } - } - - count + cost } } diff --git a/web3_proxy/src/stats/stat_buffer.rs b/web3_proxy/src/stats/stat_buffer.rs new file mode 100644 index 00000000..aaca71cd --- /dev/null +++ b/web3_proxy/src/stats/stat_buffer.rs @@ -0,0 +1,269 @@ +use super::{AppStat, RpcQueryKey}; +use crate::app::RpcSecretKeyCache; +use derive_more::From; +use futures::stream; +use hashbrown::HashMap; +use influxdb2::api::write::TimestampPrecision; +use log::{error, info, trace}; +use migration::sea_orm::prelude::Decimal; +use migration::sea_orm::DatabaseConnection; +use std::time::Duration; +use tokio::sync::broadcast; +use tokio::task::JoinHandle; +use tokio::time::interval; + +#[derive(Debug, Default)] +pub struct BufferedRpcQueryStats { + pub frontend_requests: u64, + pub backend_requests: u64, + pub backend_retries: u64, + pub no_servers: u64, + pub cache_misses: u64, + pub cache_hits: u64, + pub sum_request_bytes: u64, + pub sum_response_bytes: u64, + pub sum_response_millis: u64, + pub sum_credits_used: Decimal, + /// Balance tells us the user's balance at this point in time + pub latest_balance: Decimal, +} + +#[derive(From)] +pub struct SpawnedStatBuffer { + pub stat_sender: flume::Sender, + /// these handles are important and must be allowed to finish + pub background_handle: JoinHandle>, +} +pub struct StatBuffer { + accounting_db_buffer: HashMap, + billing_period_seconds: i64, + chain_id: u64, + db_conn: Option, + db_save_interval_seconds: u32, + global_timeseries_buffer: HashMap, + influxdb_client: Option, + opt_in_timeseries_buffer: HashMap, + rpc_secret_key_cache: Option, + timestamp_precision: TimestampPrecision, + tsdb_save_interval_seconds: u32, +} + +impl StatBuffer { + #[allow(clippy::too_many_arguments)] + pub fn try_spawn( + billing_period_seconds: i64, + bucket: String, + chain_id: u64, + db_conn: Option, + db_save_interval_seconds: u32, + influxdb_client: Option, + rpc_secret_key_cache: Option, + shutdown_receiver: broadcast::Receiver<()>, + tsdb_save_interval_seconds: u32, + ) -> anyhow::Result> { + if db_conn.is_none() && influxdb_client.is_none() { + return Ok(None); + } + + let (stat_sender, stat_receiver) = flume::unbounded(); + + let timestamp_precision = TimestampPrecision::Seconds; + let mut new = Self { + accounting_db_buffer: Default::default(), + billing_period_seconds, + chain_id, + db_conn, + db_save_interval_seconds, + global_timeseries_buffer: Default::default(), + influxdb_client, + opt_in_timeseries_buffer: Default::default(), + rpc_secret_key_cache, + timestamp_precision, + tsdb_save_interval_seconds, + }; + + // any errors inside this task will cause the application to exit + let handle = tokio::spawn(async move { + new.aggregate_and_save_loop(bucket, stat_receiver, shutdown_receiver) + .await + }); + + Ok(Some((stat_sender, handle).into())) + } + + async fn aggregate_and_save_loop( + &mut self, + bucket: String, + stat_receiver: flume::Receiver, + mut shutdown_receiver: broadcast::Receiver<()>, + ) -> anyhow::Result<()> { + let mut tsdb_save_interval = + interval(Duration::from_secs(self.tsdb_save_interval_seconds as u64)); + let mut db_save_interval = + interval(Duration::from_secs(self.db_save_interval_seconds as u64)); + + // TODO: Somewhere here we should probably be updating the balance of the user + // And also update the credits used etc. for the referred user + + loop { + tokio::select! { + stat = stat_receiver.recv_async() => { + // info!("Received stat"); + // save the stat to a buffer + match stat { + Ok(AppStat::RpcQuery(stat)) => { + if self.influxdb_client.is_some() { + // TODO: round the timestamp at all? + + let global_timeseries_key = stat.global_timeseries_key(); + + self.global_timeseries_buffer.entry(global_timeseries_key).or_default().add(stat.clone()); + + if let Some(opt_in_timeseries_key) = stat.opt_in_timeseries_key() { + self.opt_in_timeseries_buffer.entry(opt_in_timeseries_key).or_default().add(stat.clone()); + } + } + + if self.db_conn.is_some() { + self.accounting_db_buffer.entry(stat.accounting_key(self.billing_period_seconds)).or_default().add(stat); + } + } + Err(err) => { + error!("error receiving stat: {:?}", err); + break; + } + } + } + _ = db_save_interval.tick() => { + // info!("DB save internal tick"); + let count = self.save_relational_stats().await; + if count > 0 { + trace!("Saved {} stats to the relational db", count); + } + } + _ = tsdb_save_interval.tick() => { + // info!("TSDB save internal tick"); + let count = self.save_tsdb_stats(&bucket).await; + if count > 0 { + trace!("Saved {} stats to the tsdb", count); + } + } + x = shutdown_receiver.recv() => { + match x { + Ok(_) => { + info!("stat_loop shutting down"); + }, + Err(err) => error!("stat_loop shutdown receiver err={:?}", err), + } + break; + } + } + } + + let saved_relational = self.save_relational_stats().await; + + info!("saved {} pending relational stat(s)", saved_relational); + + let saved_tsdb = self.save_tsdb_stats(&bucket).await; + + info!("saved {} pending tsdb stat(s)", saved_tsdb); + + info!("accounting and stat save loop complete"); + + Ok(()) + } + + async fn save_relational_stats(&mut self) -> usize { + let mut count = 0; + + if let Some(db_conn) = self.db_conn.as_ref() { + count = self.accounting_db_buffer.len(); + for (key, stat) in self.accounting_db_buffer.drain() { + // TODO: batch saves + // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now + if let Err(err) = stat + .save_db( + self.chain_id, + db_conn, + key, + self.rpc_secret_key_cache.as_ref(), + ) + .await + { + error!("unable to save accounting entry! err={:?}", err); + }; + } + } + + count + } + + // TODO: bucket should be an enum so that we don't risk typos + async fn save_tsdb_stats(&mut self, bucket: &str) -> usize { + let mut count = 0; + + if let Some(influxdb_client) = self.influxdb_client.as_ref() { + // TODO: use stream::iter properly to avoid allocating this Vec + let mut points = vec![]; + + for (key, stat) in self.global_timeseries_buffer.drain() { + // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now + match stat + .build_timeseries_point("global_proxy", self.chain_id, key) + .await + { + Ok(point) => { + points.push(point); + } + Err(err) => { + error!("unable to build global stat! err={:?}", err); + } + }; + } + + for (key, stat) in self.opt_in_timeseries_buffer.drain() { + // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now + match stat + .build_timeseries_point("opt_in_proxy", self.chain_id, key) + .await + { + Ok(point) => { + points.push(point); + } + Err(err) => { + // TODO: if this errors, we throw away some of the pending stats! we should probably buffer them somewhere to be tried again + error!("unable to build opt-in stat! err={:?}", err); + } + }; + } + + count = points.len(); + + if count > 0 { + // TODO: put max_batch_size in config? + // TODO: i think the real limit is the byte size of the http request. so, a simple line count won't work very well + let max_batch_size = 100; + + let mut num_left = count; + + while num_left > 0 { + let batch_size = num_left.min(max_batch_size); + + let p = points.split_off(batch_size); + + num_left -= batch_size; + + if let Err(err) = influxdb_client + .write_with_precision(bucket, stream::iter(p), self.timestamp_precision) + .await + { + // TODO: if this errors, we throw away some of the pending stats! we should probably buffer them somewhere to be tried again + error!("unable to save {} tsdb stats! err={:?}", batch_size, err); + } + } + } + } + + count + } +}