ingested changes from upstream main. added another cli admin endpoint

This commit is contained in:
yenicelik 2023-01-29 23:27:34 +01:00
commit 04687b3392
45 changed files with 4625 additions and 2142 deletions

1446
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -9,9 +9,7 @@ members = [
]
[profile.release]
# we leave debug = true on so that sentry can give us line numbers
# `debug = true` so that sentry can give us line numbers
debug = true
# TODO: enable lto (and maybe other things proven with benchmarks) once rapid development is done
#lto = true
# TODO: we can't do panic = abort because the websockets disconnect by panicking sometimes
# spend longer compiling for a slightly faster binary
codegen-units = 1

@ -8,12 +8,14 @@ COPY . .
RUN --mount=type=cache,target=/usr/local/cargo/registry \
--mount=type=cache,target=/usr/src/web3_proxy/target \
cargo test &&\
cargo install --locked --root /opt/bin --path ./web3_proxy
cargo install --locked --no-default-features --root /opt/bin --path ./web3_proxy
FROM debian:bullseye-slim
COPY --from=builder /opt/bin/* /usr/local/bin/
ENTRYPOINT ["web3_proxy"]
ENTRYPOINT ["web3_proxy_cli"]
CMD [ "--config", "/web3-proxy.toml", "proxyd" ]
# TODO: lower log level when done with prototyping
ENV RUST_LOG "web3_proxy=debug"
ENV RUST_LOG "warn,web3_proxy=debug,web3_proxy_cli=debug"

@ -37,7 +37,7 @@ Options:
Start the server with the defaults (listen on `http://localhost:8544` and use `./config/development.toml` which uses the database and cache running under docker and proxies to a bunch of public nodes:
```
cargo run --release
cargo run --release -- daemon
```
## Common commands
@ -45,7 +45,7 @@ cargo run --release
Create a user:
```
cargo run --bin web3_proxy_cli -- --db-url "$YOUR_DB_URL" create_user --address "$USER_ADDRESS_0x"
cargo run -- --db-url "$YOUR_DB_URL" create_user --address "$USER_ADDRESS_0x"
```
Check that the proxy is working:
@ -104,7 +104,7 @@ web3_proxy_cli --config ... change_user_tier_by_key "$RPC_ULID_KEY_FROM_PREV_COM
Health check 3 servers and error if the first one doesn't match the others.
```
web3_proxy_cli https://eth.llamarpc.com/ https://rpc.ankr.com/eth https://cloudflare-eth.com
web3_proxy_cli health_compass https://eth.llamarpc.com/ https://rpc.ankr.com/eth https://cloudflare-eth.com
```
## Adding new database tables

32
TODO.md

@ -300,6 +300,32 @@ These are not yet ordered. There might be duplicates. We might not actually need
- [x] if private txs are disabled, only send trasactions to some of our servers. we were DOSing ourselves with transactions and slowing down sync
- [x] retry if we get "the method X is not available"
- [x] remove weight. we don't use it anymore. tiers are what we use now
- [x] make deadlock feature optional
- [x] standalone healthcheck daemon (sentryd)
- [x] status page should show version
- [x] combine the proxy and cli into one bin
- [x] improve rate limiting on websockets
- [x] retry another server if we get a jsonrpc response error about rate limits
- [x] major refactor to only use backup servers when absolutely necessary
- [x] remove allowed lag
- [x] configurable gas buffer. default to the larger of 25k or 25% on polygon to work around erigon bug
- [x] public is 3900, but free is 360. free should be at least 3900 but probably more
- [x] add --max-wait to wait_for_sync
- [x] add automatic compare urls to wait_for_sync
- [x] send panics to pagerduty
- [x] enable lto on release builds
- [x] less logs for backup servers
- [x] use channels instead of arcswap
- this will let us easily wait for a new head or a new synced connection
- [x] broadcast transactions to more servers
- [x] send sentryd errors to pagerduty
- [x] improve handling of unknown methods
- [x] don't send pagerduty alerts for websocket panics
- [x] improve waiting for sync when rate limited
- [x] improve pager duty errors for smarter deduping
- [x] add create_key cli command
- [-] proxy mode for benchmarking all backends
- [-] proxy mode for sending to multiple backends
- [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly
- this must be opt-in and spawned in the background since it will slow things down and will make their calls less private
- [ ] automatic pruning of old revert logs once too many are collected
@ -323,7 +349,7 @@ These are not yet ordered. There might be duplicates. We might not actually need
- [ ] `stat delay` script
- query database for newest stat
- [ ] period_datetime should always be :00. right now it depends on start time
- [ ] two servers running will confuse rpc_accounting!
- [ ] we have our hard rate limiter set up with a period of 60. but most providers have period of 1- [ ] two servers running will confuse rpc_accounting!
- it won't happen with users often because they should be sticky to one proxy, but unauthenticated users will definitely hit this
- one option: we need the insert to be an upsert, but how do we merge historgrams?
- [ ] don't use systemtime. use chrono
@ -508,7 +534,8 @@ in another repo: event subscriber
- [ ] if the call is something simple like "symbol" or "decimals", cache that too. though i think this could bite us.
- [ ] add a subscription that returns the head block number and hash but nothing else
- [ ] if chain split detected, what should we do? don't send transactions?
- [ ] archive check works well for local servers, but public nodes (especially on other chains) seem to give unreliable results. likely because of load balancers. maybe have a "max block data limit"
- [ ] archive check works well for local servers, but public nodes (especially on other chains) seem to give unreliable results. likely because of load balancers.
- [x] configurable block data limit until better checks
- [ ] https://docs.rs/derive_builder/latest/derive_builder/
- [ ] Detect orphaned transactions
- [ ] https://crates.io/crates/reqwest-middleware easy retry with exponential back off
@ -578,7 +605,6 @@ in another repo: event subscriber
- [ ] sentry profiling
- [ ] support alchemy_minedTransactions
- [ ] debug print of user::Model's address is a big vec of numbers. make that hex somehow
- [ ] should we combine the proxy and cli into one bin?
- [ ] make it so you can put a string like "LN arbitrum" into the create_user script, and have it automatically turn it into 0x4c4e20617262697472756d000000000000000000.
- [ ] if --address not given, use the --description
- [ ] if it is too long, (the last 4 bytes must be zero), give an error so descriptions like this stand out

@ -11,4 +11,4 @@ anyhow = "1.0.68"
hashbrown = "0.13.2"
log = "0.4.17"
moka = { version = "0.9.6", default-features = false, features = ["future"] }
tokio = "1.24.1"
tokio = "1.24.2"

@ -4,7 +4,7 @@ services:
build: .
init: true
restart: unless-stopped
command: --config /config.toml --workers 16
command: --config /config.toml --workers 16 proxyd
# rust's tokio crate expects a SIGINT https://tokio.rs/tokio/topics/shutdown
stop_signal: SIGINT
environment:

@ -68,7 +68,7 @@ services:
extends:
file: docker-compose.common.yml
service: web3-proxy
command: --config /config.toml --workers 48
command: --config /config.toml --workers 48 proxyd
volumes:
- ./config/production-eth.toml:/config.toml
- ./data/scratch:/scratch

@ -23,6 +23,22 @@ services:
volumes:
- ./data/dev_mysql:/var/lib/mysql
# influxdb for stats
dev-influxdb:
image: influxdb:2.6.1-alpine
environment:
DOCKER_INFLUXDB_INIT_MODE: setup
DOCKER_INFLUXDB_INIT_USERNAME: dev_web3_proxy
DOCKER_INFLUXDB_INIT_PASSWORD: dev_web3_proxy
DOCKER_INFLUXDB_INIT_ORG: dev_org
DOCKER_INFLUXDB_INIT_BUCKET: dev_web3_proxy
DOCKER_INFLUXDB_INIT_ADMIN_TOKEN: dev_web3_proxy_auth_token
ports:
- 127.0.0.1:8086:8086
volumes:
- ./data/dev_influxdb/data:/var/lib/influxdb2
- ./data/dev_influxdb/config:/etc/influxdb2
# volatile redis for storing rate limits
dev-vredis:
extends:

@ -1,6 +1,6 @@
[package]
name = "entities"
version = "0.12.0"
version = "0.13.0"
edition = "2021"
[lib]
@ -10,7 +10,7 @@ path = "src/mod.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
sea-orm = "0.10.6"
sea-orm = "0.10.7"
serde = "1.0.152"
uuid = "1.2.2"
ethers = "1.0.2"

@ -1,6 +1,6 @@
[package]
name = "migration"
version = "0.12.0"
version = "0.13.0"
edition = "2021"
publish = false
@ -9,10 +9,10 @@ name = "migration"
path = "src/lib.rs"
[dependencies]
tokio = { version = "1.24.1", features = ["full", "tracing"] }
tokio = { version = "1.24.2", features = ["full", "tracing"] }
[dependencies.sea-orm-migration]
version = "0.10.6"
version = "0.10.7"
features = [
# Enable at least one `ASYNC_RUNTIME` and `DATABASE_DRIVER` feature if you want to run migration via CLI.
# View the list of supported features at https://www.sea-ql.org/SeaORM/docs/install-and-config/database-and-async-runtime.

@ -2,7 +2,7 @@
- Generate a new migration file
```sh
cargo run -- migrate generate MIGRATION_NAME
cargo run -- generate MIGRATION_NAME
```
- Apply all pending migrations
```sh

@ -13,6 +13,7 @@ mod m20221108_200345_save_anon_stats;
mod m20221211_124002_request_method_privacy;
mod m20221213_134158_move_login_into_database;
mod m20230117_191358_admin_table;
mod m20230119_204135_better_free_tier;
pub struct Migrator;
@ -33,6 +34,7 @@ impl MigratorTrait for Migrator {
Box::new(m20221211_124002_request_method_privacy::Migration),
Box::new(m20221213_134158_move_login_into_database::Migration),
Box::new(m20230117_191358_admin_table::Migration),
Box::new(m20230119_204135_better_free_tier::Migration),
]
}
}

@ -0,0 +1,39 @@
//! Increase requests per minute for the free tier to be better than our public tier (which has 3900/min)
use sea_orm_migration::{prelude::*, sea_orm::ConnectionTrait};
#[derive(DeriveMigrationName)]
pub struct Migration;
#[async_trait::async_trait]
impl MigrationTrait for Migration {
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
let db_conn = manager.get_connection();
let db_backend = manager.get_database_backend();
let update_free = Query::update()
.table(UserTier::Table)
.value(UserTier::MaxRequestsPerPeriod, 6000)
.and_where(Expr::col(UserTier::Title).eq("Free"))
.limit(1)
.to_owned();
let x = db_backend.build(&update_free);
let rows_affected = db_conn.execute(x).await?.rows_affected();
assert_eq!(rows_affected, 1, "unable to update free tier");
Ok(())
}
async fn down(&self, _manager: &SchemaManager) -> Result<(), DbErr> {
todo!();
}
}
#[derive(Iden)]
enum UserTier {
Table,
Title,
MaxRequestsPerPeriod,
}

@ -7,4 +7,4 @@ edition = "2021"
[dependencies]
anyhow = "1.0.68"
deadpool-redis = { version = "0.11.1", features = ["rt_tokio_1", "serde"] }
tokio = "1.24.1"
tokio = "1.24.2"

@ -1,8 +1,8 @@
[package]
name = "web3_proxy"
version = "0.12.0"
version = "0.13.0"
edition = "2021"
default-run = "web3_proxy"
default-run = "web3_proxy_cli"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@ -19,55 +19,57 @@ migration = { path = "../migration" }
redis-rate-limiter = { path = "../redis-rate-limiter" }
thread-fast-rng = { path = "../thread-fast-rng" }
anyhow = { version = "1.0.68", features = ["backtrace"] }
arc-swap = "1.6.0"
argh = "0.1.10"
axum = { version = "0.6.2", features = ["headers", "ws"] }
axum-client-ip = "0.3.1"
axum-macros = "0.3.1"
# TODO: regex has several "perf" features that we might want to use
# TODO: make sure this uuid version matches sea-orm. PR to put this in their prelude
# TODO: import num_traits from sea-orm so we always have the same version
# TODO: import chrono from sea-orm so we always have the same version
# TODO: make sure this time version matches siwe. PR to put this in their prelude
anyhow = { version = "1.0.68", features = ["backtrace"] }
argh = "0.1.10"
axum = { version = "0.6.4", features = ["headers", "ws"] }
axum-client-ip = "0.3.1"
axum-macros = "0.3.2"
chrono = "0.4.23"
counter = "0.5.7"
derive_more = "0.99.17"
dotenv = "0.15.0"
ethers = { version = "1.0.2", default-features = false, features = ["rustls", "ws"] }
env_logger = "0.10.0"
ethers = { version = "1.0.2", default-features = false, features = ["rustls", "ws"] }
fdlimit = "0.2.1"
flume = "0.10.14"
futures = { version = "0.3.25", features = ["thread-pool"] }
gethostname = "0.4.1"
glob = "0.3.1"
handlebars = "4.3.6"
hashbrown = { version = "0.13.2", features = ["serde"] }
hdrhistogram = "7.5.2"
http = "0.2.8"
ipnet = "2.7.1"
itertools = "0.10.5"
log = "0.4.17"
metered = { version = "0.9.0", features = ["serialize"] }
moka = { version = "0.9.6", default-features = false, features = ["future"] }
notify = "5.0.0"
num = "0.4.0"
# TODO: import num_traits from sea-orm so we always have the same version
num-traits = "0.2.15"
pagerduty-rs = { version = "0.1.6", default-features = false, features = ["async", "rustls", "sync"] }
parking_lot = { version = "0.12.1", features = ["arc_lock"] }
proctitle = "0.1.1"
# TODO: regex has several "perf" features that we might want to use
regex = "1.7.1"
reqwest = { version = "0.11.13", default-features = false, features = ["json", "tokio-rustls"] }
handlebars = "4.3.6"
reqwest = { version = "0.11.14", default-features = false, features = ["json", "tokio-rustls"] }
rustc-hash = "1.1.0"
siwe = "0.5.0"
sentry = { version = "0.29.1", default-features = false, features = ["backtrace", "contexts", "panic", "anyhow", "reqwest", "rustls", "log", "sentry-log"] }
sentry = { version = "0.29.2", default-features = false, features = ["backtrace", "contexts", "panic", "anyhow", "reqwest", "rustls", "log", "sentry-log"] }
serde = { version = "1.0.152", features = [] }
serde_json = { version = "1.0.91", default-features = false, features = ["alloc", "raw_value"] }
serde_prometheus = "0.1.6"
# TODO: make sure this time version matches siwe. PR to put this in their prelude
siwe = "0.5.0"
time = "0.3.17"
tokio = { version = "1.24.1", features = ["full"] }
# TODO: make sure this uuid version matches sea-orm. PR to put this in their prelude
tokio = { version = "1.24.2", features = ["full"] }
tokio-stream = { version = "0.1.11", features = ["sync"] }
toml = "0.5.10"
toml = "0.6.0"
tower = "0.4.13"
tower-http = { version = "0.3.5", features = ["cors", "sensitive-headers"] }
ulid = { version = "1.0.0", features = ["serde"] }
url = "2.3.1"
uuid = "1.2.2"
itertools = "0.10.5"
glob = "0.3.1"

@ -4,8 +4,9 @@ mod ws;
use crate::app_stats::{ProxyResponseStat, StatEmitter, Web3ProxyStat};
use crate::block_number::{block_needed, BlockNeeded};
use crate::config::{AppConfig, TopConfig};
use crate::frontend::authorization::{Authorization, RequestMetadata};
use crate::frontend::authorization::{Authorization, RequestMetadata, RpcSecretKey};
use crate::frontend::errors::FrontendErrorResponse;
use crate::frontend::rpc_proxy_ws::ProxyMode;
use crate::jsonrpc::{
JsonRpcForwardedResponse, JsonRpcForwardedResponseEnum, JsonRpcRequest, JsonRpcRequestEnum,
};
@ -24,6 +25,7 @@ use entities::sea_orm_active_enums::LogLevel;
use entities::user;
use ethers::core::utils::keccak256;
use ethers::prelude::{Address, Block, Bytes, Transaction, TxHash, H256, U64};
use ethers::types::U256;
use ethers::utils::rlp::{Decodable, Rlp};
use futures::future::join_all;
use futures::stream::{FuturesUnordered, StreamExt};
@ -55,11 +57,12 @@ use tokio::time::{sleep, timeout};
use ulid::Ulid;
// TODO: make this customizable?
// TODO: include GIT_REF in here. i had trouble getting https://docs.rs/vergen/latest/vergen/ to work with a workspace. also .git is in .dockerignore
pub static APP_USER_AGENT: &str = concat!(
"satoshiandkin/",
"llamanodes_",
env!("CARGO_PKG_NAME"),
"/",
env!("CARGO_PKG_VERSION"),
"/v",
env!("CARGO_PKG_VERSION")
);
/// TODO: allow customizing the request period?
@ -134,12 +137,14 @@ pub type AnyhowJoinHandle<T> = JoinHandle<anyhow::Result<T>>;
#[derive(Clone, Debug, Default, From)]
pub struct AuthorizationChecks {
/// database id of the primary user.
/// database id of the primary user. 0 if anon
/// TODO: do we need this? its on the authorization so probably not
pub user_id: u64,
/// the key used (if any)
pub rpc_secret_key: Option<RpcSecretKey>,
/// database id of the rpc key
/// if this is None, then this request is being rate limited by ip
pub rpc_key_id: Option<NonZeroU64>,
pub rpc_secret_key_id: Option<NonZeroU64>,
/// if None, allow unlimited queries. inherited from the user_tier
pub max_requests_per_period: Option<u64>,
// if None, allow unlimited concurrent requests. inherited from the user_tier
@ -183,10 +188,9 @@ pub struct Web3ProxyApp {
response_cache: ResponseCache,
// don't drop this or the sender will stop working
// TODO: broadcast channel instead?
head_block_receiver: watch::Receiver<ArcBlock>,
watch_consensus_head_receiver: watch::Receiver<ArcBlock>,
pending_tx_sender: broadcast::Sender<TxStatus>,
pub config: AppConfig,
pub allowed_lag: u64,
pub db_conn: Option<sea_orm::DatabaseConnection>,
pub db_replica: Option<DatabaseReplica>,
/// prometheus metrics
@ -269,18 +273,14 @@ pub async fn drop_migration_lock(db_conn: &DatabaseConnection) -> Result<(), DbE
Ok(())
}
/// Connect to the database and run migrations
pub async fn get_migrated_db(
db_url: String,
min_connections: u32,
max_connections: u32,
) -> anyhow::Result<DatabaseConnection> {
// TODO: this seems to fail silently
let db_conn = get_db(db_url, min_connections, max_connections).await?;
/// Be super careful with override_existing_lock! It is very important that only one process is running the migrations at a time!
pub async fn migrate_db(
db_conn: &DatabaseConnection,
override_existing_lock: bool,
) -> Result<(), DbErr> {
let db_backend = db_conn.get_database_backend();
// TODO: put the timestamp into this?
// TODO: put the timestamp and hostname into this as columns?
let create_lock_statment = db_backend.build(
Table::create()
.table(Alias::new("migration_lock"))
@ -290,18 +290,24 @@ pub async fn get_migrated_db(
loop {
if Migrator::get_pending_migrations(&db_conn).await?.is_empty() {
info!("no migrations to apply");
return Ok(db_conn);
return Ok(());
}
// there are migrations to apply
// acquire a lock
if let Err(err) = db_conn.execute(create_lock_statment.clone()).await {
debug!("Unable to acquire lock. err={:?}", err);
if override_existing_lock {
warn!("OVERRIDING EXISTING LOCK in 10 seconds! ctrl+c now if other migrations are actually running!");
// TODO: exponential backoff with jitter
sleep(Duration::from_secs(1)).await;
sleep(Duration::from_secs(10)).await
} else {
debug!("Unable to acquire lock. if you are positive no migration is running, run \"web3_proxy_cli drop_migration_lock\". err={:?}", err);
continue;
// TODO: exponential backoff with jitter?
sleep(Duration::from_secs(1)).await;
continue;
}
}
debug!("migration lock acquired");
@ -314,7 +320,19 @@ pub async fn get_migrated_db(
drop_migration_lock(&db_conn).await?;
// return if migrations erred
migration_result?;
migration_result
}
/// Connect to the database and run migrations
pub async fn get_migrated_db(
db_url: String,
min_connections: u32,
max_connections: u32,
) -> Result<DatabaseConnection, DbErr> {
// TODO: this seems to fail silently
let db_conn = get_db(db_url, min_connections, max_connections).await?;
migrate_db(&db_conn, false).await?;
Ok(db_conn)
}
@ -515,7 +533,8 @@ impl Web3ProxyApp {
};
// TODO: i don't like doing Block::default here! Change this to "None"?
let (head_block_sender, head_block_receiver) = watch::channel(Arc::new(Block::default()));
let (watch_consensus_head_sender, watch_consensus_head_receiver) =
watch::channel(Arc::new(Block::default()));
// TODO: will one receiver lagging be okay? how big should this be?
let (pending_tx_sender, pending_tx_receiver) = broadcast::channel(256);
@ -552,7 +571,7 @@ impl Web3ProxyApp {
http_client.clone(),
vredis_pool.clone(),
block_map.clone(),
Some(head_block_sender),
Some(watch_consensus_head_sender),
top_config.app.min_sum_soft_limit,
top_config.app.min_synced_rpcs,
Some(pending_tx_sender.clone()),
@ -580,6 +599,8 @@ impl Web3ProxyApp {
vredis_pool.clone(),
block_map,
// subscribing to new heads here won't work well. if they are fast, they might be ahead of balanced_rpcs
// they also often have low rate limits
// however, they are well connected to miners/validators. so maybe using them as a safety check would be good
None,
0,
0,
@ -683,24 +704,12 @@ impl Web3ProxyApp {
.time_to_idle(Duration::from_secs(120))
.build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());
// TODO: get this out of the toml instead
let allowed_lag = match top_config.app.chain_id {
1 => 60,
137 => 10,
250 => 10,
_ => {
warn!("defaulting allowed lag to 60");
60
}
};
let app = Self {
config: top_config.app,
allowed_lag,
balanced_rpcs,
private_rpcs,
response_cache,
head_block_receiver,
watch_consensus_head_receiver,
pending_tx_sender,
pending_transactions,
frontend_ip_rate_limiter,
@ -723,6 +732,10 @@ impl Web3ProxyApp {
Ok((app, cancellable_handles, important_background_handles).into())
}
pub fn head_block_receiver(&self) -> watch::Receiver<ArcBlock> {
self.watch_consensus_head_receiver.clone()
}
pub async fn prometheus_metrics(&self) -> String {
let globals = HashMap::new();
// TODO: what globals? should this be the hostname or what?
@ -907,10 +920,10 @@ impl Web3ProxyApp {
self: &Arc<Self>,
authorization: Arc<Authorization>,
request: JsonRpcRequestEnum,
proxy_mode: ProxyMode,
) -> Result<(JsonRpcForwardedResponseEnum, Vec<Arc<Web3Connection>>), FrontendErrorResponse>
{
// TODO: this should probably be trace level
// // trace!(?request, "proxy_web3_rpc");
// trace!(?request, "proxy_web3_rpc");
// even though we have timeouts on the requests to our backend providers,
// we need a timeout for the incoming request so that retries don't run forever
@ -921,7 +934,7 @@ impl Web3ProxyApp {
JsonRpcRequestEnum::Single(request) => {
let (response, rpcs) = timeout(
max_time,
self.proxy_web3_rpc_request(&authorization, request),
self.proxy_cached_request(&authorization, request, proxy_mode),
)
.await??;
@ -930,7 +943,7 @@ impl Web3ProxyApp {
JsonRpcRequestEnum::Batch(requests) => {
let (responses, rpcs) = timeout(
max_time,
self.proxy_web3_rpc_requests(&authorization, requests),
self.proxy_web3_rpc_requests(&authorization, requests, proxy_mode),
)
.await??;
@ -947,6 +960,7 @@ impl Web3ProxyApp {
self: &Arc<Self>,
authorization: &Arc<Authorization>,
requests: Vec<JsonRpcRequest>,
proxy_mode: ProxyMode,
) -> anyhow::Result<(Vec<JsonRpcForwardedResponse>, Vec<Arc<Web3Connection>>)> {
// TODO: we should probably change ethers-rs to support this directly. they pushed this off to v2 though
let num_requests = requests.len();
@ -956,7 +970,7 @@ impl Web3ProxyApp {
let responses = join_all(
requests
.into_iter()
.map(|request| self.proxy_web3_rpc_request(authorization, request))
.map(|request| self.proxy_cached_request(authorization, request, proxy_mode))
.collect::<Vec<_>>(),
)
.await;
@ -1000,10 +1014,11 @@ impl Web3ProxyApp {
}
#[measure([ErrorCount, HitCount, ResponseTime, Throughput])]
async fn proxy_web3_rpc_request(
async fn proxy_cached_request(
self: &Arc<Self>,
authorization: &Arc<Authorization>,
mut request: JsonRpcRequest,
proxy_mode: ProxyMode,
) -> anyhow::Result<(JsonRpcForwardedResponse, Vec<Arc<Web3Connection>>)> {
// trace!("Received request: {:?}", request);
@ -1083,8 +1098,15 @@ impl Web3ProxyApp {
| "shh_uninstallFilter"
| "shh_version") => {
// TODO: client error stat
// TODO: proper error code
return Err(anyhow::anyhow!("method unsupported: {}", method));
// TODO: what error code?
return Ok((
JsonRpcForwardedResponse::from_string(
format!("method unsupported: {}", method),
None,
Some(request_id),
),
vec![],
));
}
// TODO: implement these commands
method @ ("eth_getFilterChanges"
@ -1094,7 +1116,15 @@ impl Web3ProxyApp {
| "eth_newPendingTransactionFilter"
| "eth_uninstallFilter") => {
// TODO: unsupported command stat
return Err(anyhow::anyhow!("not yet implemented: {}", method));
// TODO: what error code?
return Ok((
JsonRpcForwardedResponse::from_string(
format!("not yet implemented: {}", method),
None,
Some(request_id),
),
vec![],
));
}
// some commands can use local data or caches
"eth_accounts" => {
@ -1122,18 +1152,14 @@ impl Web3ProxyApp {
// TODO: eth_sendPrivateTransaction (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_sendprivatetransaction)
"eth_coinbase" => {
// no need for serving coinbase
// we could return a per-user payment address here, but then we might leak that to dapps
// no stats on this. its cheap
json!(Address::zero())
}
/*
// erigon was giving bad estimates. but now it doesn't need it
"eth_estimateGas" => {
// TODO: eth_estimateGas using anvil?
// TODO: modify the block requested?
let mut response = self
.balanced_rpcs
.try_send_best_upstream_server(
.try_proxy_connection(
proxy_mode,
authorization,
request,
Some(&request_metadata),
@ -1141,11 +1167,9 @@ impl Web3ProxyApp {
)
.await?;
let parsed_gas_estimate = if let Some(gas_estimate) = response.result.take() {
let parsed_gas_estimate: U256 = serde_json::from_str(gas_estimate.get())
.context("gas estimate result is not an U256")?;
parsed_gas_estimate
let mut gas_estimate: U256 = if let Some(gas_estimate) = response.result.take() {
serde_json::from_str(gas_estimate.get())
.context("gas estimate result is not an U256")?
} else {
// i think this is always an error response
let rpcs = request_metadata.backend_requests.lock().clone();
@ -1153,13 +1177,21 @@ impl Web3ProxyApp {
return Ok((response, rpcs));
};
// increase by 1.01%
let parsed_gas_estimate =
parsed_gas_estimate * U256::from(101_010) / U256::from(100_000);
let gas_increase =
if let Some(gas_increase_percent) = self.config.gas_increase_percent {
let gas_increase = gas_estimate * gas_increase_percent / U256::from(100);
json!(parsed_gas_estimate)
let min_gas_increase = self.config.gas_increase_min.unwrap_or_default();
gas_increase.max(min_gas_increase)
} else {
self.config.gas_increase_min.unwrap_or_default()
};
gas_estimate += gas_increase;
json!(gas_estimate)
}
*/
// TODO: eth_gasPrice that does awesome magic to predict the future
"eth_hashrate" => {
// no stats on this. its cheap
@ -1172,22 +1204,32 @@ impl Web3ProxyApp {
// TODO: eth_sendBundle (flashbots command)
// broadcast transactions to all private rpcs at once
"eth_sendRawTransaction" => {
// TODO: how should we handle private_mode here?
let default_num = match proxy_mode {
// TODO: how many balanced rpcs should we send to? configurable? percentage of total?
ProxyMode::Best => Some(4),
ProxyMode::Fastest(0) => None,
// TODO: how many balanced rpcs should we send to? configurable? percentage of total?
// TODO: what if we do 2 per tier? we want to blast the third party rpcs
// TODO: maybe having the third party rpcs in their own Web3Connections would be good for this
ProxyMode::Fastest(x) => Some(x * 4),
ProxyMode::Versus => None,
};
let (private_rpcs, num) = if let Some(private_rpcs) = self.private_rpcs.as_ref() {
if authorization.checks.private_txs {
// if we are sending the transaction privately, no matter the proxy_mode, we send to ALL private rpcs
(private_rpcs, None)
} else {
// TODO: how many balanced rpcs should we send to? configurable? percentage of total?
// TODO: what if we do 2 per tier? we want to blast the third party rpcs
// TODO: maybe having the third party rpcs would be good for this
(&self.balanced_rpcs, Some(2))
(&self.balanced_rpcs, default_num)
}
} else {
(&self.balanced_rpcs, Some(2))
(&self.balanced_rpcs, default_num)
};
// try_send_all_upstream_servers puts the request id into the response. no need to do that ourselves here.
let mut response = private_rpcs
.try_send_all_upstream_servers(
.try_send_all_synced_connections(
authorization,
&request,
Some(request_metadata.clone()),
@ -1283,13 +1325,23 @@ impl Web3ProxyApp {
json!(false)
}
"eth_subscribe" => {
return Err(anyhow::anyhow!(
"notifications not supported. eth_subscribe is only available over a websocket"
return Ok((
JsonRpcForwardedResponse::from_str(
"notifications not supported. eth_subscribe is only available over a websocket",
Some(-32601),
Some(request_id),
),
vec![],
));
}
"eth_unsubscribe" => {
return Err(anyhow::anyhow!(
"notifications not supported. eth_unsubscribe is only available over a websocket"
return Ok((
JsonRpcForwardedResponse::from_str(
"notifications not supported. eth_unsubscribe is only available over a websocket",
Some(-32601),
Some(request_id),
),
vec![],
));
}
"net_listening" => {
@ -1298,7 +1350,8 @@ impl Web3ProxyApp {
json!(true)
}
"net_peerCount" => {
// emit stats
// no stats on this. its cheap
// TODO: do something with proxy_mode here?
self.balanced_rpcs.num_synced_rpcs().into()
}
"web3_clientVersion" => {
@ -1312,10 +1365,18 @@ impl Web3ProxyApp {
Some(serde_json::Value::Array(params)) => {
// TODO: make a struct and use serde conversion to clean this up
if params.len() != 1 || !params[0].is_string() {
// TODO: this needs the correct error code in the response
return Err(anyhow::anyhow!("invalid request"));
// TODO: what error code?
return Ok((
JsonRpcForwardedResponse::from_str(
"Invalid request",
Some(-32600),
Some(request_id),
),
vec![],
));
}
// TODO: don't return with ? here. send a jsonrpc invalid request
let param = Bytes::from_str(
params[0]
.as_str()
@ -1329,18 +1390,35 @@ impl Web3ProxyApp {
_ => {
// TODO: this needs the correct error code in the response
// TODO: emit stat?
return Err(anyhow::anyhow!("invalid request"));
return Ok((
JsonRpcForwardedResponse::from_str(
"invalid request",
None,
Some(request_id),
),
vec![],
));
}
}
}
"test" => {
return Ok((
JsonRpcForwardedResponse::from_str(
"The method test does not exist/is not available.",
Some(-32601),
Some(request_id),
),
vec![],
));
}
// anything else gets sent to backend rpcs and cached
method => {
// emit stats
// TODO: if no servers synced, wait for them to be synced?
let head_block = self
// TODO: if no servers synced, wait for them to be synced? probably better to error and let haproxy retry another server
let head_block_num = self
.balanced_rpcs
.head_block()
.head_block_num()
.context("no servers synced")?;
// we do this check before checking caches because it might modify the request params
@ -1350,7 +1428,7 @@ impl Web3ProxyApp {
authorization,
method,
request.params.as_mut(),
head_block.number(),
head_block_num,
&self.balanced_rpcs,
)
.await?
@ -1404,11 +1482,12 @@ impl Web3ProxyApp {
.try_get_with(cache_key, async move {
// TODO: retry some failures automatically!
// TODO: try private_rpcs if all the balanced_rpcs fail!
// TODO: put the hash here instead?
// TODO: put the hash here instead of the block number? its in the request already.
let mut response = self
.balanced_rpcs
.try_send_best_upstream_server(
self.allowed_lag,
.try_proxy_connection(
proxy_mode,
&authorization,
request,
Some(&request_metadata),
@ -1433,18 +1512,14 @@ impl Web3ProxyApp {
})?
} else {
self.balanced_rpcs
.try_send_best_upstream_server(
self.allowed_lag,
.try_proxy_connection(
proxy_mode,
&authorization,
request,
Some(&request_metadata),
None,
)
.await
.map_err(|err| {
// TODO: emit a stat for an error
anyhow::anyhow!("error while forwarding response: {}", err)
})?
.await?
}
};

@ -50,7 +50,7 @@ impl Web3ProxyApp {
match request_json.params.as_ref() {
Some(x) if x == &json!(["newHeads"]) => {
let authorization = authorization.clone();
let head_block_receiver = self.head_block_receiver.clone();
let head_block_receiver = self.watch_consensus_head_receiver.clone();
let stat_sender = self.stat_sender.clone();
trace!("newHeads subscription {:?}", subscription_id);

@ -36,7 +36,7 @@ impl ProxyResponseStat {
fn key(&self) -> ProxyResponseAggregateKey {
// include either the rpc_key_id or the origin
let (mut rpc_key_id, origin) = match (
self.authorization.checks.rpc_key_id,
self.authorization.checks.rpc_secret_key_id,
&self.authorization.origin,
) {
(Some(rpc_key_id), _) => {

@ -1,27 +1,39 @@
// TODO: websockets instead of http
// TODO: support websockets
use anyhow::Context;
use argh::FromArgs;
use chrono::Utc;
use ethers::types::U64;
use ethers::types::{Block, TxHash};
use log::info;
use log::warn;
use reqwest::Client;
use serde::Deserialize;
use serde_json::json;
use std::sync::atomic::{AtomicU32, Ordering};
use tokio::time::sleep;
use tokio::time::Duration;
#[derive(Debug, FromArgs)]
/// Command line interface for admins to interact with web3_proxy
pub struct CliConfig {
/// the RPC to check
/// the HTTP RPC to check
#[argh(option, default = "\"http://localhost:8545\".to_string()")]
pub check_url: String,
/// the RPC to compare to
#[argh(option, default = "\"https://eth.llamarpc.com\".to_string()")]
pub compare_url: String,
/// the HTTP RPC to compare against. defaults to LlamaNodes public RPC
#[argh(option)]
pub compare_url: Option<String>,
/// how many seconds to wait for sync.
/// Defaults to waiting forever.
/// if the wait is exceeded, will exit with code 2
#[argh(option)]
pub max_wait: Option<u64>,
/// require a specific chain id (for extra safety)
#[argh(option)]
pub chain_id: Option<u64>,
}
#[tokio::main]
@ -38,26 +50,73 @@ async fn main() -> anyhow::Result<()> {
let cli_config: CliConfig = argh::from_env();
let json_request = json!({
"id": "1",
"jsonrpc": "2.0",
"method": "eth_getBlockByNumber",
"params": [
"latest",
false,
],
});
let client = reqwest::Client::new();
// TODO: make sure the chain ids match
// TODO: automatic compare_url based on the chain id
let check_url = cli_config.check_url;
// make sure the chain ids match
let check_id = get_chain_id(&check_url, &client)
.await
.context("unknown chain id for check_url")?;
if let Some(chain_id) = cli_config.chain_id {
if chain_id != check_id {
return Err(anyhow::anyhow!(
"chain_id of check_url is wrong! Need {}. Found {}",
chain_id,
check_id,
));
}
}
let compare_url: String = match cli_config.compare_url {
Some(x) => x,
None => match check_id {
1 => "https://eth.llamarpc.com",
137 => "https://polygon.llamarpc.com",
_ => {
return Err(anyhow::anyhow!(
"--compare-url required for chain {}",
check_id
))
}
}
.to_string(),
};
info!(
"comparing {} to {} (chain {})",
check_url, compare_url, check_id
);
let compare_id = get_chain_id(&compare_url, &client)
.await
.context("unknown chain id for compare_url")?;
if check_id != compare_id {
return Err(anyhow::anyhow!(
"chain_id does not match! Need {}. Found {}",
check_id,
compare_id,
));
}
// start ids at 2 because id 1 was checking the chain id
let counter = AtomicU32::new(2);
let start = tokio::time::Instant::now();
loop {
match main_loop(&cli_config, &client, &json_request).await {
match main_loop(&check_url, &compare_url, &client, &counter).await {
Ok(()) => break,
Err(err) => {
warn!("{:?}", err);
if let Some(max_wait) = cli_config.max_wait {
if max_wait == 0 || start.elapsed().as_secs() > max_wait {
std::process::exit(2);
}
}
sleep(Duration::from_secs(10)).await;
}
}
@ -66,38 +125,77 @@ async fn main() -> anyhow::Result<()> {
Ok(())
}
#[derive(Deserialize)]
struct JsonRpcChainIdResult {
result: U64,
}
async fn get_chain_id(rpc: &str, client: &reqwest::Client) -> anyhow::Result<u64> {
let get_chain_id_request = json!({
"id": "1",
"jsonrpc": "2.0",
"method": "eth_chainId",
});
// TODO: loop until chain id is found?
let check_result = client
.post(rpc)
.json(&get_chain_id_request)
.send()
.await
.context("failed querying chain id")?
.json::<JsonRpcChainIdResult>()
.await
.context("failed parsing chain id")?
.result
.as_u64();
Ok(check_result)
}
#[derive(Deserialize)]
struct JsonRpcBlockResult {
result: Block<TxHash>,
}
async fn main_loop(
cli_config: &CliConfig,
check_url: &str,
compare_url: &str,
client: &Client,
json_request: &serde_json::Value,
counter: &AtomicU32,
) -> anyhow::Result<()> {
let check_result = client
.post(&cli_config.check_url)
.json(json_request)
// TODO: have a real id here that increments every call?
let get_block_number_request = json!({
"id": counter.fetch_add(1, Ordering::SeqCst),
"jsonrpc": "2.0",
"method": "eth_getBlockByNumber",
"params": [
"latest",
false,
],
});
let check_block = client
.post(check_url)
.json(&get_block_number_request)
.send()
.await
.context("querying check block")?
.json::<JsonRpcBlockResult>()
.await
.context("parsing check block")?;
.context("parsing check block")?
.result;
let compare_result = client
.post(&cli_config.compare_url)
.json(json_request)
let compare_block = client
.post(compare_url)
.json(&get_block_number_request)
.send()
.await
.context("querying compare block")?
.json::<JsonRpcBlockResult>()
.await
.context("parsing compare block")?;
let check_block = check_result.result;
let compare_block = compare_result.result;
.context("parsing compare block")?
.result;
let check_number = check_block.number.context("no check block number")?;
let compare_number = compare_block.number.context("no compare block number")?;

@ -1,399 +0,0 @@
//! Web3_proxy is a fast caching and load balancing proxy for web3 (Ethereum or similar) JsonRPC servers.
//!
//! Signed transactions (eth_sendRawTransaction) are sent in parallel to the configured private RPCs (eden, ethermine, flashbots, etc.).
//!
//! All other requests are sent to an RPC server on the latest block (alchemy, moralis, rivet, your own node, or one of many other providers).
//! If multiple servers are in sync, the fastest server is prioritized. Since the fastest server is most likely to serve requests, slow servers are unlikely to ever get any requests.
//#![warn(missing_docs)]
#![forbid(unsafe_code)]
use anyhow::Context;
use futures::StreamExt;
use log::{debug, error, info, warn};
use num::Zero;
use parking_lot::deadlock;
use std::fs;
use std::path::Path;
use std::sync::atomic::{self, AtomicUsize};
use std::thread;
use tokio::runtime;
use tokio::sync::broadcast;
use tokio::time::Duration;
use web3_proxy::app::{flatten_handle, flatten_handles, Web3ProxyApp};
use web3_proxy::config::{CliConfig, TopConfig};
use web3_proxy::{frontend, metrics_frontend};
fn run(
shutdown_sender: broadcast::Sender<()>,
cli_config: CliConfig,
top_config: TopConfig,
) -> anyhow::Result<()> {
debug!("{:?}", cli_config);
debug!("{:?}", top_config);
let mut shutdown_receiver = shutdown_sender.subscribe();
// spawn a thread for deadlock detection
// TODO: disable this feature during release mode and things should go faster
thread::spawn(move || loop {
thread::sleep(Duration::from_secs(10));
let deadlocks = deadlock::check_deadlock();
if deadlocks.is_empty() {
continue;
}
println!("{} deadlocks detected", deadlocks.len());
for (i, threads) in deadlocks.iter().enumerate() {
println!("Deadlock #{}", i);
for t in threads {
println!("Thread Id {:#?}", t.thread_id());
println!("{:#?}", t.backtrace());
}
}
});
// set up tokio's async runtime
let mut rt_builder = runtime::Builder::new_multi_thread();
let chain_id = top_config.app.chain_id;
rt_builder.enable_all().thread_name_fn(move || {
static ATOMIC_ID: AtomicUsize = AtomicUsize::new(0);
// TODO: what ordering? i think we want seqcst so that these all happen in order, but that might be stricter than we really need
let worker_id = ATOMIC_ID.fetch_add(1, atomic::Ordering::SeqCst);
// TODO: i think these max at 15 characters
format!("web3-{}-{}", chain_id, worker_id)
});
if cli_config.workers > 0 {
rt_builder.worker_threads(cli_config.workers);
}
// start tokio's async runtime
let rt = rt_builder.build()?;
let num_workers = rt.metrics().num_workers();
info!("num_workers: {}", num_workers);
rt.block_on(async {
let app_frontend_port = cli_config.port;
let app_prometheus_port = cli_config.prometheus_port;
// start the main app
let mut spawned_app =
Web3ProxyApp::spawn(top_config, num_workers, shutdown_sender.subscribe()).await?;
let frontend_handle =
tokio::spawn(frontend::serve(app_frontend_port, spawned_app.app.clone()));
let prometheus_handle = tokio::spawn(metrics_frontend::serve(
spawned_app.app,
app_prometheus_port,
));
// if everything is working, these should both run forever
tokio::select! {
x = flatten_handles(spawned_app.app_handles) => {
match x {
Ok(_) => info!("app_handle exited"),
Err(e) => {
return Err(e);
}
}
}
x = flatten_handle(frontend_handle) => {
match x {
Ok(_) => info!("frontend exited"),
Err(e) => {
return Err(e);
}
}
}
x = flatten_handle(prometheus_handle) => {
match x {
Ok(_) => info!("prometheus exited"),
Err(e) => {
return Err(e);
}
}
}
x = tokio::signal::ctrl_c() => {
match x {
Ok(_) => info!("quiting from ctrl-c"),
Err(e) => {
return Err(e.into());
}
}
}
x = shutdown_receiver.recv() => {
match x {
Ok(_) => info!("quiting from shutdown receiver"),
Err(e) => {
return Err(e.into());
}
}
}
};
// one of the handles stopped. send a value so the others know to shut down
if let Err(err) = shutdown_sender.send(()) {
warn!("shutdown sender err={:?}", err);
};
// wait for things like saving stats to the database to complete
info!("waiting on important background tasks");
let mut background_errors = 0;
while let Some(x) = spawned_app.background_handles.next().await {
match x {
Err(e) => {
error!("{:?}", e);
background_errors += 1;
}
Ok(Err(e)) => {
error!("{:?}", e);
background_errors += 1;
}
Ok(Ok(_)) => continue,
}
}
if background_errors.is_zero() {
info!("finished");
} else {
// TODO: collect instead?
error!("finished with errors!")
}
Ok(())
})
}
fn main() -> anyhow::Result<()> {
// if RUST_LOG isn't set, configure a default
let rust_log = match std::env::var("RUST_LOG") {
Ok(x) => x,
Err(_) => "info,ethers=debug,redis_rate_limit=debug,web3_proxy=debug".to_string(),
};
// this probably won't matter for us in docker, but better safe than sorry
fdlimit::raise_fd_limit();
// initial configuration from flags
let cli_config: CliConfig = argh::from_env();
// convert to absolute path so error logging is most helpful
let config_path = Path::new(&cli_config.config)
.canonicalize()
.context(format!(
"checking full path of {} and {}",
".", // TODO: get cwd somehow
cli_config.config
))?;
// advanced configuration is on disk
let top_config: String = fs::read_to_string(config_path.clone())
.context(format!("reading config at {}", config_path.display()))?;
let top_config: TopConfig = toml::from_str(&top_config)
.context(format!("parsing config at {}", config_path.display()))?;
// TODO: this doesn't seem to do anything
proctitle::set_title(format!("web3_proxy-{}", top_config.app.chain_id));
let logger = env_logger::builder().parse_filters(&rust_log).build();
let max_level = logger.filter();
// connect to sentry for error reporting
// if no sentry, only log to stdout
let _sentry_guard = if let Some(sentry_url) = top_config.app.sentry_url.clone() {
let logger = sentry::integrations::log::SentryLogger::with_dest(logger);
log::set_boxed_logger(Box::new(logger)).unwrap();
let guard = sentry::init((
sentry_url,
sentry::ClientOptions {
release: sentry::release_name!(),
// TODO: Set this a to lower value (from config) in production
traces_sample_rate: 1.0,
..Default::default()
},
));
Some(guard)
} else {
log::set_boxed_logger(Box::new(logger)).unwrap();
None
};
log::set_max_level(max_level);
// we used to do this earlier, but now we attach sentry
debug!("CLI config @ {:#?}", cli_config.config);
// tokio has code for catching ctrl+c so we use that
// this shutdown sender is currently only used in tests, but we might make a /shutdown endpoint or something
// we do not need this receiver. new receivers are made by `shutdown_sender.subscribe()`
let (shutdown_sender, _) = broadcast::channel(1);
run(shutdown_sender, cli_config, top_config)
}
#[cfg(test)]
mod tests {
use ethers::{
prelude::{Http, Provider, U256},
utils::Anvil,
};
use hashbrown::HashMap;
use std::env;
use web3_proxy::{
config::{AppConfig, Web3ConnectionConfig},
rpcs::blockchain::ArcBlock,
};
use super::*;
#[tokio::test]
async fn it_works() {
// TODO: move basic setup into a test fixture
let path = env::var("PATH").unwrap();
println!("path: {}", path);
// TODO: how should we handle logs in this?
// TODO: option for super verbose logs
std::env::set_var("RUST_LOG", "info,web3_proxy=debug");
let _ = env_logger::builder().is_test(true).try_init();
let anvil = Anvil::new().spawn();
println!("Anvil running at `{}`", anvil.endpoint());
let anvil_provider = Provider::<Http>::try_from(anvil.endpoint()).unwrap();
// mine a block because my code doesn't like being on block 0
// TODO: make block 0 okay? is it okay now?
let _: U256 = anvil_provider
.request("evm_mine", None::<()>)
.await
.unwrap();
// make a test CliConfig
let cli_config = CliConfig {
port: 0,
prometheus_port: 0,
workers: 4,
config: "./does/not/exist/test.toml".to_string(),
cookie_key_filename: "./does/not/exist/development_cookie_key".to_string(),
};
// make a test TopConfig
// TODO: load TopConfig from a file? CliConfig could have `cli_config.load_top_config`. would need to inject our endpoint ports
let top_config = TopConfig {
app: AppConfig {
chain_id: 31337,
default_user_max_requests_per_period: Some(6_000_000),
min_sum_soft_limit: 1,
min_synced_rpcs: 1,
public_requests_per_period: Some(1_000_000),
response_cache_max_bytes: 10_usize.pow(7),
redirect_public_url: Some("example.com/".to_string()),
redirect_rpc_key_url: Some("example.com/{{rpc_key_id}}".to_string()),
..Default::default()
},
balanced_rpcs: HashMap::from([
(
"anvil".to_string(),
Web3ConnectionConfig {
disabled: false,
display_name: None,
url: anvil.endpoint(),
block_data_limit: None,
soft_limit: 100,
hard_limit: None,
tier: 0,
subscribe_txs: Some(false),
extra: Default::default(),
},
),
(
"anvil_ws".to_string(),
Web3ConnectionConfig {
disabled: false,
display_name: None,
url: anvil.ws_endpoint(),
block_data_limit: None,
soft_limit: 100,
hard_limit: None,
tier: 0,
subscribe_txs: Some(false),
extra: Default::default(),
},
),
]),
private_rpcs: None,
extra: Default::default(),
};
let (shutdown_sender, _) = broadcast::channel(1);
// spawn another thread for running the app
// TODO: allow launching into the local tokio runtime instead of creating a new one?
let handle = {
let shutdown_sender = shutdown_sender.clone();
thread::spawn(move || run(shutdown_sender, cli_config, top_config))
};
// TODO: do something to the node. query latest block, mine another block, query again
let proxy_provider = Provider::<Http>::try_from(anvil.endpoint()).unwrap();
let anvil_result = anvil_provider
.request::<_, Option<ArcBlock>>("eth_getBlockByNumber", ("latest", true))
.await
.unwrap()
.unwrap();
let proxy_result = proxy_provider
.request::<_, Option<ArcBlock>>("eth_getBlockByNumber", ("latest", true))
.await
.unwrap()
.unwrap();
assert_eq!(anvil_result, proxy_result);
let first_block_num = anvil_result.number.unwrap();
let _: U256 = anvil_provider
.request("evm_mine", None::<()>)
.await
.unwrap();
let anvil_result = anvil_provider
.request::<_, Option<ArcBlock>>("eth_getBlockByNumber", ("latest", true))
.await
.unwrap()
.unwrap();
let proxy_result = proxy_provider
.request::<_, Option<ArcBlock>>("eth_getBlockByNumber", ("latest", true))
.await
.unwrap()
.unwrap();
assert_eq!(anvil_result, proxy_result);
let second_block_num = anvil_result.number.unwrap();
assert_eq!(first_block_num, second_block_num - 1);
// tell the test app to shut down
shutdown_sender.send(()).unwrap();
println!("waiting for shutdown...");
// TODO: panic if a timeout is reached
handle.join().unwrap().unwrap();
}
}

@ -0,0 +1,77 @@
use anyhow::Context;
use argh::FromArgs;
use entities::{rpc_key, user};
use ethers::prelude::Address;
use log::info;
use migration::sea_orm::{self, ActiveModelTrait, ColumnTrait, EntityTrait, QueryFilter};
use ulid::Ulid;
use uuid::Uuid;
use web3_proxy::frontend::authorization::RpcSecretKey;
#[derive(FromArgs, PartialEq, Debug, Eq)]
/// Create a new user and api key
#[argh(subcommand, name = "create_key")]
pub struct CreateKeySubCommand {
/// the user's ethereum address or descriptive string.
/// If a string is given, it will be converted to hex and potentially truncated.
/// Users from strings are only for testing since they won't be able to log in.
#[argh(positional)]
address: String,
/// the user's api ULID or UUID key.
/// If none given, one will be created.
#[argh(option)]
rpc_secret_key: Option<RpcSecretKey>,
/// an optional short description of the key's purpose.
#[argh(option)]
description: Option<String>,
}
impl CreateKeySubCommand {
pub async fn main(self, db: &sea_orm::DatabaseConnection) -> anyhow::Result<()> {
// TODO: would be nice to use the fixed array instead of a Vec in the entities
// take a simple String. If it starts with 0x, parse as address. otherwise convert ascii to hex
let address: Vec<u8> = if self.address.starts_with("0x") {
let address = self.address.parse::<Address>()?;
address.to_fixed_bytes().into()
} else {
// TODO: allow ENS
// left pad and truncate the string
let address = &format!("{:\x00>20}", self.address)[0..20];
// convert the string to bytes
let bytes = address.as_bytes();
// convert the slice to a Vec
bytes.try_into().expect("Bytes can always be a Vec<u8>")
};
// TODO: get existing or create a new one
let u = user::Entity::find()
.filter(user::Column::Address.eq(address))
.one(db)
.await?
.context("No user found with that address")?;
info!("user #{}", u.id);
let rpc_secret_key = self.rpc_secret_key.unwrap_or_else(RpcSecretKey::new);
// create a key for the new user
let uk = rpc_key::ActiveModel {
user_id: sea_orm::Set(u.id),
secret_key: sea_orm::Set(rpc_secret_key.into()),
description: sea_orm::Set(self.description),
..Default::default()
};
let _uk = uk.save(db).await.context("Failed saving new user key")?;
info!("user key as ULID: {}", Ulid::from(rpc_secret_key));
info!("user key as UUID: {}", Uuid::from(rpc_secret_key));
Ok(())
}
}

@ -0,0 +1,309 @@
#![forbid(unsafe_code)]
use argh::FromArgs;
use futures::StreamExt;
use log::{error, info, warn};
use num::Zero;
use tokio::sync::broadcast;
use web3_proxy::app::{flatten_handle, flatten_handles, Web3ProxyApp};
use web3_proxy::config::TopConfig;
use web3_proxy::{frontend, metrics_frontend};
/// start the main proxy daemon
#[derive(FromArgs, PartialEq, Debug, Eq)]
#[argh(subcommand, name = "proxyd")]
pub struct ProxydSubCommand {
/// path to a toml of rpc servers
/// what port the proxy should listen on
#[argh(option, default = "8544")]
pub port: u16,
/// what port the proxy should expose prometheus stats on
#[argh(option, default = "8543")]
pub prometheus_port: u16,
}
impl ProxydSubCommand {
pub async fn main(self, top_config: TopConfig, num_workers: usize) -> anyhow::Result<()> {
let (shutdown_sender, _) = broadcast::channel(1);
run(
top_config,
self.port,
self.prometheus_port,
num_workers,
shutdown_sender,
)
.await
}
}
async fn run(
top_config: TopConfig,
frontend_port: u16,
prometheus_port: u16,
num_workers: usize,
shutdown_sender: broadcast::Sender<()>,
) -> anyhow::Result<()> {
// tokio has code for catching ctrl+c so we use that
// this shutdown sender is currently only used in tests, but we might make a /shutdown endpoint or something
// we do not need this receiver. new receivers are made by `shutdown_sender.subscribe()`
let app_frontend_port = frontend_port;
let app_prometheus_port = prometheus_port;
let mut shutdown_receiver = shutdown_sender.subscribe();
// start the main app
let mut spawned_app =
Web3ProxyApp::spawn(top_config, num_workers, shutdown_sender.subscribe()).await?;
// start the prometheus metrics port
let prometheus_handle = tokio::spawn(metrics_frontend::serve(
spawned_app.app.clone(),
app_prometheus_port,
));
// wait until the app has seen its first consensus head block
// TODO: if backups were included, wait a little longer
let _ = spawned_app.app.head_block_receiver().changed().await;
// start the frontend port
let frontend_handle = tokio::spawn(frontend::serve(app_frontend_port, spawned_app.app.clone()));
// if everything is working, these should all run forever
tokio::select! {
x = flatten_handles(spawned_app.app_handles) => {
match x {
Ok(_) => info!("app_handle exited"),
Err(e) => {
return Err(e);
}
}
}
x = flatten_handle(frontend_handle) => {
match x {
Ok(_) => info!("frontend exited"),
Err(e) => {
return Err(e);
}
}
}
x = flatten_handle(prometheus_handle) => {
match x {
Ok(_) => info!("prometheus exited"),
Err(e) => {
return Err(e);
}
}
}
x = tokio::signal::ctrl_c() => {
match x {
Ok(_) => info!("quiting from ctrl-c"),
Err(e) => {
return Err(e.into());
}
}
}
x = shutdown_receiver.recv() => {
match x {
Ok(_) => info!("quiting from shutdown receiver"),
Err(e) => {
return Err(e.into());
}
}
}
};
// one of the handles stopped. send a value so the others know to shut down
if let Err(err) = shutdown_sender.send(()) {
warn!("shutdown sender err={:?}", err);
};
// wait for things like saving stats to the database to complete
info!("waiting on important background tasks");
let mut background_errors = 0;
while let Some(x) = spawned_app.background_handles.next().await {
match x {
Err(e) => {
error!("{:?}", e);
background_errors += 1;
}
Ok(Err(e)) => {
error!("{:?}", e);
background_errors += 1;
}
Ok(Ok(_)) => continue,
}
}
if background_errors.is_zero() {
info!("finished");
Ok(())
} else {
// TODO: collect instead?
Err(anyhow::anyhow!("finished with errors!"))
}
}
#[cfg(test)]
mod tests {
use ethers::{
prelude::{Http, Provider, U256},
utils::Anvil,
};
use hashbrown::HashMap;
use std::env;
use web3_proxy::{
config::{AppConfig, Web3ConnectionConfig},
rpcs::blockchain::ArcBlock,
};
use super::*;
#[tokio::test]
async fn it_works() {
// TODO: move basic setup into a test fixture
let path = env::var("PATH").unwrap();
println!("path: {}", path);
// TODO: how should we handle logs in this?
// TODO: option for super verbose logs
std::env::set_var("RUST_LOG", "info,web3_proxy=debug");
let _ = env_logger::builder().is_test(true).try_init();
let anvil = Anvil::new().spawn();
println!("Anvil running at `{}`", anvil.endpoint());
let anvil_provider = Provider::<Http>::try_from(anvil.endpoint()).unwrap();
// mine a block because my code doesn't like being on block 0
// TODO: make block 0 okay? is it okay now?
let _: U256 = anvil_provider
.request("evm_mine", None::<()>)
.await
.unwrap();
// make a test TopConfig
// TODO: load TopConfig from a file? CliConfig could have `cli_config.load_top_config`. would need to inject our endpoint ports
let top_config = TopConfig {
app: AppConfig {
chain_id: 31337,
default_user_max_requests_per_period: Some(6_000_000),
min_sum_soft_limit: 1,
min_synced_rpcs: 1,
public_requests_per_period: Some(1_000_000),
response_cache_max_bytes: 10_usize.pow(7),
redirect_public_url: Some("example.com/".to_string()),
redirect_rpc_key_url: Some("example.com/{{rpc_key_id}}".to_string()),
..Default::default()
},
balanced_rpcs: HashMap::from([
(
"anvil".to_string(),
Web3ConnectionConfig {
disabled: false,
display_name: None,
url: anvil.endpoint(),
backup: Some(false),
block_data_limit: None,
soft_limit: 100,
hard_limit: None,
tier: 0,
subscribe_txs: Some(false),
extra: Default::default(),
},
),
(
"anvil_ws".to_string(),
Web3ConnectionConfig {
disabled: false,
display_name: None,
url: anvil.ws_endpoint(),
backup: Some(false),
block_data_limit: None,
soft_limit: 100,
hard_limit: None,
tier: 0,
subscribe_txs: Some(false),
extra: Default::default(),
},
),
]),
private_rpcs: None,
extra: Default::default(),
};
let (shutdown_sender, _) = broadcast::channel(1);
// spawn another thread for running the app
// TODO: allow launching into the local tokio runtime instead of creating a new one?
let handle = {
let shutdown_sender = shutdown_sender.clone();
let frontend_port = 0;
let prometheus_port = 0;
tokio::spawn(async move {
run(
top_config,
frontend_port,
prometheus_port,
2,
shutdown_sender,
)
.await
})
};
// TODO: do something to the node. query latest block, mine another block, query again
let proxy_provider = Provider::<Http>::try_from(anvil.endpoint()).unwrap();
let anvil_result = anvil_provider
.request::<_, Option<ArcBlock>>("eth_getBlockByNumber", ("latest", true))
.await
.unwrap()
.unwrap();
let proxy_result = proxy_provider
.request::<_, Option<ArcBlock>>("eth_getBlockByNumber", ("latest", true))
.await
.unwrap()
.unwrap();
assert_eq!(anvil_result, proxy_result);
let first_block_num = anvil_result.number.unwrap();
let _: U256 = anvil_provider
.request("evm_mine", None::<()>)
.await
.unwrap();
let anvil_result = anvil_provider
.request::<_, Option<ArcBlock>>("eth_getBlockByNumber", ("latest", true))
.await
.unwrap()
.unwrap();
let proxy_result = proxy_provider
.request::<_, Option<ArcBlock>>("eth_getBlockByNumber", ("latest", true))
.await
.unwrap()
.unwrap();
assert_eq!(anvil_result, proxy_result);
let second_block_num = anvil_result.number.unwrap();
assert_eq!(first_block_num, second_block_num - 1);
// tell the test app to shut down
shutdown_sender.send(()).unwrap();
println!("waiting for shutdown...");
// TODO: panic if a timeout is reached
handle.await.unwrap().unwrap();
}
}

@ -1,15 +1,24 @@
use argh::FromArgs;
use migration::sea_orm::DatabaseConnection;
use web3_proxy::app::drop_migration_lock;
use web3_proxy::app::{drop_migration_lock, migrate_db};
#[derive(FromArgs, PartialEq, Debug, Eq)]
/// In case of emergency, break glass.
#[argh(subcommand, name = "drop_migration_lock")]
pub struct DropMigrationLockSubCommand {}
pub struct DropMigrationLockSubCommand {
#[argh(option)]
/// run migrations after dropping the lock
and_migrate: bool,
}
impl DropMigrationLockSubCommand {
pub async fn main(&self, db_conn: &DatabaseConnection) -> anyhow::Result<()> {
drop_migration_lock(db_conn).await?;
if self.and_migrate {
migrate_db(db_conn, true).await?;
} else {
// just drop the lock
drop_migration_lock(db_conn).await?;
}
Ok(())
}

@ -1,137 +0,0 @@
use argh::FromArgs;
use ethers::types::{Block, TxHash, H256};
use log::{error, info, warn};
use serde::{Deserialize, Serialize};
use serde_json::json;
use web3_proxy::jsonrpc::JsonRpcErrorData;
#[derive(FromArgs, PartialEq, Debug, Eq)]
/// Never bring only 2 compasses to sea.
#[argh(subcommand, name = "health_compass")]
pub struct HealthCompassSubCommand {
#[argh(positional)]
/// first rpc
rpc_a: String,
#[argh(positional)]
/// second rpc
rpc_b: String,
#[argh(positional)]
/// third rpc
rpc_c: String,
}
#[derive(Debug, Deserialize, Serialize)]
struct JsonRpcResponse<V> {
// pub jsonrpc: String,
// pub id: Box<RawValue>,
#[serde(skip_serializing_if = "Option::is_none")]
pub result: Option<V>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<JsonRpcErrorData>,
}
impl HealthCompassSubCommand {
pub async fn main(self) -> anyhow::Result<()> {
let client = reqwest::Client::new();
let block_by_number_request = json!({
"jsonrpc": "2.0",
"id": "1",
"method": "eth_getBlockByNumber",
"params": ["latest", false],
});
let a = client
.post(&self.rpc_a)
.json(&block_by_number_request)
.send()
.await?
.json::<JsonRpcResponse<Block<TxHash>>>()
.await?
.result
.unwrap();
// check the parent because b and c might not be as fast as a
let parent_hash = a.parent_hash;
let a = check_rpc(&parent_hash, &client, &self.rpc_a).await;
let b = check_rpc(&parent_hash, &client, &self.rpc_b).await;
let c = check_rpc(&parent_hash, &client, &self.rpc_c).await;
match (a, b, c) {
(Ok(Ok(a)), Ok(Ok(b)), Ok(Ok(c))) => {
if a != b {
error!("A: {:?}\n\nB: {:?}\n\nC: {:?}", a, b, c);
return Err(anyhow::anyhow!("difference detected!"));
}
if b != c {
error!("\nA: {:?}\n\nB: {:?}\n\nC: {:?}", a, b, c);
return Err(anyhow::anyhow!("difference detected!"));
}
// all three rpcs agree
}
(Ok(Ok(a)), Ok(Ok(b)), c) => {
// not all successes! but still enough to compare
warn!("C failed: {:?}", c);
if a != b {
error!("\nA: {:?}\n\nB: {:?}", a, b);
return Err(anyhow::anyhow!("difference detected!"));
}
}
(Ok(Ok(a)), b, Ok(Ok(c))) => {
// not all successes! but still enough to compare
warn!("B failed: {:?}", b);
if a != c {
error!("\nA: {:?}\n\nC: {:?}", a, c);
return Err(anyhow::anyhow!("difference detected!"));
}
}
(a, b, c) => {
// not enough successes
error!("A: {:?}\n\nB: {:?}\n\nC: {:?}", a, b, c);
return Err(anyhow::anyhow!("All are failing!"));
}
}
info!("OK");
Ok(())
}
}
// i don't think we need a whole provider. a simple http request is easiest
async fn check_rpc(
block_hash: &H256,
client: &reqwest::Client,
rpc: &str,
) -> anyhow::Result<Result<Block<TxHash>, JsonRpcErrorData>> {
let block_by_hash_request = json!({
"jsonrpc": "2.0",
"id": "1",
"method": "eth_getBlockByHash",
"params": [block_hash, false],
});
// TODO: don't unwrap! don't use the try operator
let response: JsonRpcResponse<Block<TxHash>> = client
.post(rpc)
.json(&block_by_hash_request)
.send()
.await?
.json()
.await?;
if let Some(result) = response.result {
Ok(Ok(result))
} else if let Some(result) = response.error {
Ok(Err(result))
} else {
unimplemented!("{:?}", response)
}
}

@ -5,35 +5,61 @@ mod change_user_tier_by_address;
mod change_user_tier_by_key;
mod check_config;
mod count_users;
mod create_key;
mod create_user;
mod daemon;
mod drop_migration_lock;
mod health_compass;
mod list_user_tier;
mod pagerduty;
mod rpc_accounting;
mod sentryd;
mod transfer_key;
mod user_export;
mod user_import;
use anyhow::Context;
use argh::FromArgs;
use std::fs;
use ethers::types::U256;
use log::{info, warn};
use pagerduty_rs::eventsv2async::EventsV2 as PagerdutyAsyncEventsV2;
use pagerduty_rs::eventsv2sync::EventsV2 as PagerdutySyncEventsV2;
use std::{
fs, panic,
path::Path,
sync::atomic::{self, AtomicUsize},
};
use tokio::runtime;
use web3_proxy::pagerduty::panic_handler;
use web3_proxy::{
app::{get_db, get_migrated_db},
app::{get_db, get_migrated_db, APP_USER_AGENT},
config::TopConfig,
};
#[cfg(feature = "deadlock")]
use parking_lot::deadlock;
#[cfg(feature = "deadlock")]
use std::thread;
#[cfg(feature = "deadlock")]
use tokio::time::Duration;
#[derive(Debug, FromArgs)]
/// Command line interface for admins to interact with web3_proxy
pub struct CliConfig {
/// path to the application config (optional).
pub struct Web3ProxyCli {
/// path to the application config (only required for some commands; defaults to dev config).
#[argh(option)]
pub config: Option<String>,
/// if no config, what database the client should connect to. Defaults to dev db.
#[argh(
option,
default = "\"mysql://root:dev_web3_proxy@127.0.0.1:13306/dev_web3_proxy\".to_string()"
)]
pub db_url: String,
/// number of worker threads. Defaults to the number of logical processors
#[argh(option, default = "0")]
pub workers: usize,
/// if no config, what database the client should connect to (only required for some commands; Defaults to dev db)
#[argh(option)]
pub db_url: Option<String>,
/// if no config, what sentry url should the client should connect to
#[argh(option)]
pub sentry_url: Option<String>,
/// this one cli can do multiple things
#[argh(subcommand)]
@ -50,10 +76,13 @@ enum SubCommand {
ChangeUserTierByKey(change_user_tier_by_key::ChangeUserTierByKeySubCommand),
CheckConfig(check_config::CheckConfigSubCommand),
CountUsers(count_users::CountUsersSubCommand),
CreateKey(create_key::CreateKeySubCommand),
CreateUser(create_user::CreateUserSubCommand),
DropMigrationLock(drop_migration_lock::DropMigrationLockSubCommand),
HealthCompass(health_compass::HealthCompassSubCommand),
Pagerduty(pagerduty::PagerdutySubCommand),
Proxyd(daemon::ProxydSubCommand),
RpcAccounting(rpc_accounting::RpcAccountingSubCommand),
Sentryd(sentryd::SentrydSubCommand),
TransferKey(transfer_key::TransferKeySubCommand),
UserExport(user_export::UserExportSubCommand),
UserImport(user_import::UserImportSubCommand),
@ -62,28 +91,97 @@ enum SubCommand {
// TODO: sub command to change a user's tier
}
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// if RUST_LOG isn't set, configure a default
// TODO: is there a better way to do this?
if std::env::var("RUST_LOG").is_err() {
// std::env::set_var("RUST_LOG", "info,web3_proxy=debug,web3_proxy_cli=debug");
std::env::set_var("RUST_LOG", "info,web3_proxy=debug,web3_proxy_cli=debug");
fn main() -> anyhow::Result<()> {
#[cfg(feature = "deadlock")]
{
// spawn a thread for deadlock detection
thread::spawn(move || loop {
thread::sleep(Duration::from_secs(10));
let deadlocks = deadlock::check_deadlock();
if deadlocks.is_empty() {
continue;
}
println!("{} deadlocks detected", deadlocks.len());
for (i, threads) in deadlocks.iter().enumerate() {
println!("Deadlock #{}", i);
for t in threads {
println!("Thread Id {:#?}", t.thread_id());
println!("{:#?}", t.backtrace());
}
}
});
}
env_logger::init();
// if RUST_LOG isn't set, configure a default
// TODO: is there a better way to do this?
let rust_log = match std::env::var("RUST_LOG") {
Ok(x) => x,
Err(_) => match std::env::var("WEB3_PROXY_TRACE").map(|x| x == "true") {
Ok(true) => {
vec![
"info",
"ethers=debug",
"redis_rate_limit=debug",
"web3_proxy=trace",
"web3_proxy_cli=trace",
"web3_proxy::rpcs::blockchain=info",
"web3_proxy::rpcs::request=debug",
]
}
_ => {
vec![
"info",
"ethers=debug",
"redis_rate_limit=debug",
"web3_proxy=debug",
"web3_proxy_cli=debug",
]
}
}
.join(","),
};
// this probably won't matter for us in docker, but better safe than sorry
fdlimit::raise_fd_limit();
let mut cli_config: CliConfig = argh::from_env();
let mut cli_config: Web3ProxyCli = argh::from_env();
if cli_config.config.is_none() && cli_config.db_url.is_none() && cli_config.sentry_url.is_none()
{
// TODO: default to example.toml if development.toml doesn't exist
info!("defaulting to development config");
cli_config.config = Some("./config/development.toml".to_string());
}
let top_config = if let Some(top_config_path) = cli_config.config.clone() {
let top_config_path = Path::new(&top_config_path)
.canonicalize()
.context(format!("checking for config at {}", top_config_path))?;
let _top_config = if let Some(top_config_path) = cli_config.config.clone() {
let top_config: String = fs::read_to_string(top_config_path)?;
let top_config: TopConfig = toml::from_str(&top_config)?;
let mut top_config: TopConfig = toml::from_str(&top_config)?;
if let Some(top_config_db_url) = top_config.app.db_url.clone() {
cli_config.db_url = top_config_db_url;
// TODO: this doesn't seem to do anything
proctitle::set_title(format!("web3_proxy-{}", top_config.app.chain_id));
if cli_config.db_url.is_none() {
cli_config.db_url = top_config.app.db_url.clone();
}
if let Some(sentry_url) = top_config.app.sentry_url.clone() {
cli_config.sentry_url = Some(sentry_url);
}
if top_config.app.chain_id == 137 {
// TODO: these numbers are arbitrary. i think the maticnetwork/erigon fork has a bug
if top_config.app.gas_increase_min.is_none() {
top_config.app.gas_increase_min = Some(U256::from(40_000));
}
if top_config.app.gas_increase_percent.is_none() {
top_config.app.gas_increase_percent = Some(U256::from(40));
}
}
Some(top_config)
@ -91,69 +189,231 @@ async fn main() -> anyhow::Result<()> {
None
};
match cli_config.sub_command {
SubCommand::ChangeUserAddress(x) => {
let db_conn = get_db(cli_config.db_url, 1, 1).await?;
let logger = env_logger::builder().parse_filters(&rust_log).build();
x.main(&db_conn).await
}
SubCommand::ChangeUserTier(x) => {
let db_conn = get_db(cli_config.db_url, 1, 1).await?;
let max_level = logger.filter();
x.main(&db_conn).await
}
SubCommand::ChangeUserAdminStatus(x) => {
let db_conn = get_db(cli_config.db_url, 1, 1).await?;
// connect to sentry for error reporting
// if no sentry, only log to stdout
let _sentry_guard = if let Some(sentry_url) = cli_config.sentry_url.clone() {
let logger = sentry::integrations::log::SentryLogger::with_dest(logger);
x.main(&db_conn).await
}
SubCommand::ChangeUserTierByAddress(x) => {
let db_conn = get_db(cli_config.db_url, 1, 1).await?;
log::set_boxed_logger(Box::new(logger)).unwrap();
x.main(&db_conn).await
}
SubCommand::ChangeUserTierByKey(x) => {
let db_conn = get_db(cli_config.db_url, 1, 1).await?;
let guard = sentry::init((
sentry_url,
sentry::ClientOptions {
release: sentry::release_name!(),
// TODO: Set this a to lower value (from config) in production
traces_sample_rate: 1.0,
..Default::default()
},
));
x.main(&db_conn).await
}
SubCommand::CheckConfig(x) => x.main().await,
SubCommand::CreateUser(x) => {
let db_conn = get_migrated_db(cli_config.db_url, 1, 1).await?;
Some(guard)
} else {
log::set_boxed_logger(Box::new(logger)).unwrap();
x.main(&db_conn).await
}
SubCommand::CountUsers(x) => {
let db_conn = get_db(cli_config.db_url, 1, 1).await?;
None
};
x.main(&db_conn).await
}
SubCommand::DropMigrationLock(x) => {
// very intentionally, do NOT run migrations here
let db_conn = get_db(cli_config.db_url, 1, 1).await?;
log::set_max_level(max_level);
x.main(&db_conn).await
}
SubCommand::HealthCompass(x) => x.main().await,
SubCommand::RpcAccounting(x) => {
let db_conn = get_migrated_db(cli_config.db_url, 1, 1).await?;
info!("{}", APP_USER_AGENT);
x.main(&db_conn).await
}
SubCommand::TransferKey(x) => {
let db_conn = get_db(cli_config.db_url, 1, 1).await?;
// optionally connect to pagerduty
// TODO: fix this nested result
let (pagerduty_async, pagerduty_sync) = if let Ok(pagerduty_key) =
std::env::var("PAGERDUTY_INTEGRATION_KEY")
{
let pagerduty_async =
PagerdutyAsyncEventsV2::new(pagerduty_key.clone(), Some(APP_USER_AGENT.to_string()))?;
let pagerduty_sync =
PagerdutySyncEventsV2::new(pagerduty_key, Some(APP_USER_AGENT.to_string()))?;
x.main(&db_conn).await
}
SubCommand::UserExport(x) => {
let db_conn = get_migrated_db(cli_config.db_url, 1, 1).await?;
(Some(pagerduty_async), Some(pagerduty_sync))
} else {
info!("No PAGERDUTY_INTEGRATION_KEY");
x.main(&db_conn).await
}
SubCommand::UserImport(x) => {
let db_conn = get_migrated_db(cli_config.db_url, 1, 1).await?;
(None, None)
};
x.main(&db_conn).await
}
// panic handler that sends to pagerduty.
// TODO: use the sentry handler if no pager duty. use default if no sentry
if let Some(pagerduty_sync) = pagerduty_sync {
let top_config = top_config.clone();
panic::set_hook(Box::new(move |x| {
panic_handler(top_config.clone(), &pagerduty_sync, x);
}));
}
// set up tokio's async runtime
let mut rt_builder = runtime::Builder::new_multi_thread();
rt_builder.enable_all();
if cli_config.workers > 0 {
rt_builder.worker_threads(cli_config.workers);
}
if let Some(top_config) = top_config.as_ref() {
let chain_id = top_config.app.chain_id;
rt_builder.thread_name_fn(move || {
static ATOMIC_ID: AtomicUsize = AtomicUsize::new(0);
// TODO: what ordering? i think we want seqcst so that these all happen in order, but that might be stricter than we really need
let worker_id = ATOMIC_ID.fetch_add(1, atomic::Ordering::SeqCst);
// TODO: i think these max at 15 characters
format!("web3-{}-{}", chain_id, worker_id)
});
}
// start tokio's async runtime
let rt = rt_builder.build()?;
let num_workers = rt.metrics().num_workers();
info!("num_workers: {}", num_workers);
rt.block_on(async {
match cli_config.sub_command {
SubCommand::ChangeUserAddress(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::ChangeUserTier(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::ChangeUserAdminStatus(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::ChangeUserTierByAddress(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::ChangeUserTierByKey(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::CheckConfig(x) => x.main().await,
SubCommand::CreateKey(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run create a key");
let db_conn = get_migrated_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::CreateUser(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_migrated_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::CountUsers(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::Proxyd(x) => {
let top_config = top_config.expect("--config is required to run proxyd");
x.main(top_config, num_workers).await
}
SubCommand::DropMigrationLock(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
// very intentionally, do NOT run migrations here
let db_conn = get_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::Pagerduty(x) => {
if cli_config.sentry_url.is_none() {
warn!("sentry_url is not set! Logs will only show in this console");
}
x.main(pagerduty_async, top_config).await
}
SubCommand::Sentryd(x) => {
if cli_config.sentry_url.is_none() {
warn!("sentry_url is not set! Logs will only show in this console");
}
x.main(pagerduty_async, top_config).await
}
SubCommand::RpcAccounting(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_migrated_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::TransferKey(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::UserExport(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_migrated_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
SubCommand::UserImport(x) => {
let db_url = cli_config
.db_url
.expect("'--config' (with a db) or '--db-url' is required to run proxyd");
let db_conn = get_migrated_db(db_url, 1, 1).await?;
x.main(&db_conn).await
}
}
})
}

@ -0,0 +1,88 @@
use argh::FromArgs;
use log::{error, info};
use pagerduty_rs::{eventsv2async::EventsV2 as PagerdutyAsyncEventsV2, types::Event};
use web3_proxy::{
config::TopConfig,
pagerduty::{pagerduty_alert, pagerduty_alert_for_config},
};
#[derive(FromArgs, PartialEq, Debug, Eq)]
/// Quickly create a pagerduty alert
#[argh(subcommand, name = "pagerduty")]
pub struct PagerdutySubCommand {
#[argh(positional)]
/// short description of the alert
summary: String,
/// the chain id to require. Only used if not using --config.
#[argh(option)]
chain_id: Option<u64>,
#[argh(option)]
/// the class/type of the event
class: Option<String>,
#[argh(option)]
/// the component of the event
component: Option<String>,
#[argh(option)]
/// deduplicate alerts based on this key.
/// If there are no open incidents with this key, a new incident will be created.
/// If there is an open incident with a matching key, the new event will be appended to that incident's Alerts log as an additional Trigger log entry.
dedup_key: Option<String>,
}
impl PagerdutySubCommand {
pub async fn main(
self,
pagerduty_async: Option<PagerdutyAsyncEventsV2>,
top_config: Option<TopConfig>,
) -> anyhow::Result<()> {
// TODO: allow customizing severity
let event = top_config
.map(|top_config| {
pagerduty_alert_for_config(
self.class.clone(),
self.component.clone(),
None::<()>,
pagerduty_rs::types::Severity::Error,
self.summary.clone(),
None,
top_config,
)
})
.unwrap_or_else(|| {
pagerduty_alert(
self.chain_id,
self.class,
None,
None,
self.component,
None::<()>,
pagerduty_rs::types::Severity::Error,
None,
self.summary,
None,
)
});
if let Some(pagerduty_async) = pagerduty_async {
info!(
"sending to pagerduty: {}",
serde_json::to_string_pretty(&event)?
);
if let Err(err) = pagerduty_async.event(Event::AlertTrigger(event)).await {
error!("Failed sending to pagerduty: {}", err);
}
} else {
info!(
"would send to pagerduty if PAGERDUTY_INTEGRATION_KEY were set: {}",
serde_json::to_string_pretty(&event)?
);
}
Ok(())
}
}

@ -12,6 +12,8 @@ use migration::{
},
Condition,
};
use serde::Serialize;
use serde_json::json;
/// count requests
#[derive(FromArgs, PartialEq, Debug, Eq)]
@ -37,7 +39,7 @@ pub struct RpcAccountingSubCommand {
impl RpcAccountingSubCommand {
pub async fn main(self, db_conn: &DatabaseConnection) -> anyhow::Result<()> {
#[derive(Debug, FromQueryResult)]
#[derive(Serialize, FromQueryResult)]
struct SelectResult {
total_frontend_requests: Decimal,
// pub total_backend_retries: Decimal,
@ -137,8 +139,9 @@ impl RpcAccountingSubCommand {
.context("no query result")?;
info!(
"query_response for chain {:?}: {:#?}",
self.chain_id, query_response
"query_response for chain {:?}: {:#}",
self.chain_id,
json!(query_response)
);
// let query_seconds: Decimal = query_response

@ -0,0 +1,256 @@
use anyhow::{anyhow, Context};
use chrono::{DateTime, Utc};
use ethers::types::{Block, TxHash, H256};
use futures::{stream::FuturesUnordered, StreamExt};
use log::{debug, warn};
use serde::{Deserialize, Serialize};
use serde_json::json;
use web3_proxy::jsonrpc::JsonRpcErrorData;
use super::{SentrydErrorBuilder, SentrydResult};
#[derive(Debug, Deserialize, Serialize)]
struct JsonRpcResponse<V> {
// pub jsonrpc: String,
// pub id: Box<RawValue>,
#[serde(skip_serializing_if = "Option::is_none")]
pub result: Option<V>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<JsonRpcErrorData>,
}
#[derive(Serialize, Ord, PartialEq, PartialOrd, Eq)]
struct AbbreviatedBlock {
pub num: u64,
pub time: DateTime<Utc>,
pub hash: H256,
}
impl From<Block<TxHash>> for AbbreviatedBlock {
fn from(x: Block<TxHash>) -> Self {
Self {
num: x.number.unwrap().as_u64(),
hash: x.hash.unwrap(),
time: x.time().unwrap(),
}
}
}
pub async fn main(
error_builder: SentrydErrorBuilder,
rpc: String,
others: Vec<String>,
max_age: i64,
max_lag: i64,
) -> SentrydResult {
let client = reqwest::Client::new();
let block_by_number_request = json!({
"jsonrpc": "2.0",
"id": "1",
"method": "eth_getBlockByNumber",
"params": ["latest", false],
});
let a = client
.post(&rpc)
.json(&block_by_number_request)
.send()
.await
.context(format!("error querying block from {}", rpc))
.map_err(|x| error_builder.build(x))?;
if !a.status().is_success() {
return error_builder.result(anyhow!("bad response from {}: {}", rpc, a.status()));
}
// TODO: capture response headers now in case of error. store them in the extra data on the pager duty alert
let headers = format!("{:#?}", a.headers());
let body = a
.text()
.await
.context(format!("failed parsing body from {}", rpc))
.map_err(|x| error_builder.build(x))?;
let a: JsonRpcResponse<Block<TxHash>> = serde_json::from_str(&body)
.context(format!("body: {}", body))
.context(format!("failed parsing json from {}", rpc))
.map_err(|x| error_builder.build(x))?;
let a = if let Some(block) = a.result {
block
} else if let Some(err) = a.error {
return error_builder.result(
anyhow::anyhow!("headers: {:#?}. err: {:#?}", headers, err)
.context(format!("jsonrpc error from {}: code {}", rpc, err.code)),
);
} else {
return error_builder
.result(anyhow!("{:#?}", a).context(format!("empty response from {}", rpc)));
};
// check the parent because b and c might not be as fast as a
let parent_hash = a.parent_hash;
let rpc_block = check_rpc(parent_hash, client.clone(), rpc.to_string())
.await
.context(format!("Error while querying primary rpc: {}", rpc))
.map_err(|err| error_builder.build(err))?;
let fs = FuturesUnordered::new();
for other in others.iter() {
let f = check_rpc(parent_hash, client.clone(), other.to_string());
fs.push(tokio::spawn(f));
}
let other_check: Vec<_> = fs.collect().await;
if other_check.is_empty() {
return error_builder.result(anyhow::anyhow!("No other RPCs to check!"));
}
// TODO: collect into a counter instead?
let mut newest_other = None;
for oc in other_check.iter() {
match oc {
Ok(Ok(x)) => newest_other = newest_other.max(Some(x)),
Ok(Err(err)) => warn!("failed checking other rpc: {:?}", err),
Err(err) => warn!("internal error checking other rpc: {:?}", err),
}
}
if let Some(newest_other) = newest_other {
let duration_since = newest_other
.time
.signed_duration_since(rpc_block.time)
.num_seconds();
match duration_since.abs().cmp(&max_lag) {
std::cmp::Ordering::Less | std::cmp::Ordering::Equal => {}
std::cmp::Ordering::Greater => match duration_since.cmp(&0) {
std::cmp::Ordering::Equal => {
unimplemented!("we already checked that they are not equal")
}
std::cmp::Ordering::Less => {
return error_builder.result(anyhow::anyhow!(
"Our RPC is too far ahead ({} s)! Something might be wrong.\n{:#}\nvs\n{:#}",
duration_since.abs(),
json!(rpc_block),
json!(newest_other),
).context(format!("{} is too far ahead", rpc)));
}
std::cmp::Ordering::Greater => {
return error_builder.result(
anyhow::anyhow!(
"Behind {} s!\n{:#}\nvs\n{:#}",
duration_since,
json!(rpc_block),
json!(newest_other),
)
.context(format!("{} is too far behind", rpc)),
);
}
},
}
let now = Utc::now();
let block_age = now
.signed_duration_since(newest_other.max(&rpc_block).time)
.num_seconds();
match block_age.abs().cmp(&max_age) {
std::cmp::Ordering::Less | std::cmp::Ordering::Equal => {}
std::cmp::Ordering::Greater => match duration_since.cmp(&0) {
std::cmp::Ordering::Equal => unimplemented!(),
std::cmp::Ordering::Less => {
return error_builder.result(
anyhow::anyhow!(
"Clock is behind {}s! Something might be wrong.\n{:#}\nvs\n{:#}",
block_age.abs(),
json!(now),
json!(newest_other),
)
.context(format!("Clock is too far behind on {}!", rpc)),
);
}
std::cmp::Ordering::Greater => {
return error_builder.result(
anyhow::anyhow!(
"block is too old ({}s)!\n{:#}\nvs\n{:#}",
block_age,
json!(now),
json!(newest_other),
)
.context(format!("block is too old on {}!", rpc)),
);
}
},
}
} else {
return error_builder.result(anyhow::anyhow!("No other RPC times to check!"));
}
debug!("rpc comparison ok: {:#}", json!(rpc_block));
Ok(())
}
// i don't think we need a whole provider. a simple http request is easiest
async fn check_rpc(
block_hash: H256,
client: reqwest::Client,
rpc: String,
) -> anyhow::Result<AbbreviatedBlock> {
let block_by_hash_request = json!({
"jsonrpc": "2.0",
"id": "1",
"method": "eth_getBlockByHash",
"params": [block_hash, false],
});
let response = client
.post(&rpc)
.json(&block_by_hash_request)
.send()
.await
.context(format!("awaiting response from {}", rpc))?;
if !response.status().is_success() {
return Err(anyhow::anyhow!(
"bad response from {}: {}",
rpc,
response.status(),
));
}
let body = response
.text()
.await
.context(format!("failed parsing body from {}", rpc))?;
let response_json: JsonRpcResponse<Block<TxHash>> = serde_json::from_str(&body)
.context(format!("body: {}", body))
.context(format!("failed parsing json from {}", rpc))?;
if let Some(result) = response_json.result {
let abbreviated = AbbreviatedBlock::from(result);
debug!("{} has {:?}@{}", rpc, abbreviated.hash, abbreviated.num);
Ok(abbreviated)
} else if let Some(result) = response_json.error {
Err(anyhow!(
"jsonrpc error during check_rpc from {}: {:#}",
rpc,
json!(result),
))
} else {
Err(anyhow!(
"empty result during check_rpc from {}: {:#}",
rpc,
json!(response_json)
))
}
}

@ -0,0 +1,284 @@
mod compare;
mod simple;
use anyhow::Context;
use argh::FromArgs;
use futures::{
stream::{FuturesUnordered, StreamExt},
Future,
};
use log::{error, info};
use pagerduty_rs::{eventsv2async::EventsV2 as PagerdutyAsyncEventsV2, types::Event};
use serde_json::json;
use std::time::Duration;
use tokio::sync::mpsc;
use tokio::time::{interval, MissedTickBehavior};
use web3_proxy::{config::TopConfig, pagerduty::pagerduty_alert};
#[derive(FromArgs, PartialEq, Debug, Eq)]
/// Loop healthchecks and send pager duty alerts if any fail
#[argh(subcommand, name = "sentryd")]
pub struct SentrydSubCommand {
#[argh(positional)]
/// the main (HTTP only) web3-proxy being checked.
web3_proxy: String,
/// the chain id to require. Only used if not using --config.
#[argh(option)]
chain_id: Option<u64>,
#[argh(option)]
/// warning threshold for age of the best known head block
max_age: i64,
#[argh(option)]
/// warning threshold for seconds between the rpc and best other_rpc's head blocks
max_lag: i64,
#[argh(option)]
/// other (HTTP only) rpcs to compare the main rpc to
other_rpc: Vec<String>,
#[argh(option)]
/// other (HTTP only) web3-proxies to compare the main rpc to
other_proxy: Vec<String>,
#[argh(option)]
/// how many seconds between running checks
seconds: Option<u64>,
}
#[derive(Debug)]
pub struct SentrydError {
/// The class/type of the event, for example ping failure or cpu load
class: String,
/// Errors will send a pagerduty alert. others just give log messages
level: log::Level,
/// A short summary that should be mostly static
summary: String,
/// Lots of detail about the error
extra: Option<serde_json::Value>,
}
/// helper for creating SentrydErrors
#[derive(Clone)]
pub struct SentrydErrorBuilder {
class: String,
level: log::Level,
}
impl SentrydErrorBuilder {
fn build(&self, err: anyhow::Error) -> SentrydError {
SentrydError {
class: self.class.to_owned(),
level: self.level.to_owned(),
summary: format!("{}", err),
extra: Some(json!(format!("{:#?}", err))),
}
}
fn result(&self, err: anyhow::Error) -> SentrydResult {
Err(self.build(err))
}
}
type SentrydResult = Result<(), SentrydError>;
impl SentrydSubCommand {
pub async fn main(
self,
pagerduty_async: Option<PagerdutyAsyncEventsV2>,
top_config: Option<TopConfig>,
) -> anyhow::Result<()> {
// sentry logging should already be configured
let chain_id = self
.chain_id
.or_else(|| top_config.map(|x| x.app.chain_id))
.context("--config or --chain-id required")?;
let primary_proxy = self.web3_proxy.trim_end_matches("/").to_string();
let other_proxy: Vec<_> = self
.other_proxy
.into_iter()
.map(|x| x.trim_end_matches("/").to_string())
.collect();
let other_rpc: Vec<_> = self
.other_rpc
.into_iter()
.map(|x| x.trim_end_matches("/").to_string())
.collect();
let seconds = self.seconds.unwrap_or(60);
let mut handles = FuturesUnordered::new();
// channels and a task for sending errors to logs/pagerduty
let (error_sender, mut error_receiver) = mpsc::channel::<SentrydError>(10);
{
let error_handler_f = async move {
if pagerduty_async.is_none() {
info!("set PAGERDUTY_INTEGRATION_KEY to send create alerts for errors");
}
while let Some(err) = error_receiver.recv().await {
log::log!(err.level, "check failed: {:#?}", err);
if matches!(err.level, log::Level::Error) {
let alert = pagerduty_alert(
Some(chain_id),
Some(err.class),
Some("web3-proxy-sentry".to_string()),
None,
None,
err.extra,
pagerduty_rs::types::Severity::Error,
None,
err.summary,
None,
);
if let Some(pagerduty_async) = pagerduty_async.as_ref() {
info!(
"sending to pagerduty: {:#}",
serde_json::to_string_pretty(&alert)?
);
if let Err(err) =
pagerduty_async.event(Event::AlertTrigger(alert)).await
{
error!("Failed sending to pagerduty: {:#?}", err);
}
}
}
}
Ok(())
};
handles.push(tokio::spawn(error_handler_f));
}
// spawn a bunch of health check loops that do their checks on an interval
// check the main rpc's /health endpoint
{
let url = if primary_proxy.contains("/rpc/") {
let x = primary_proxy.split("/rpc/").next().unwrap();
format!("{}/health", x)
} else {
format!("{}/health", primary_proxy)
};
let error_sender = error_sender.clone();
// TODO: what timeout?
let timeout = Duration::from_secs(5);
let loop_f = a_loop(
"main /health",
seconds,
log::Level::Error,
error_sender,
move |error_builder| simple::main(error_builder, url.clone(), timeout),
);
handles.push(tokio::spawn(loop_f));
}
// check any other web3-proxy /health endpoints
for other_web3_proxy in other_proxy.iter() {
let url = if other_web3_proxy.contains("/rpc/") {
let x = other_web3_proxy.split("/rpc/").next().unwrap();
format!("{}/health", x)
} else {
format!("{}/health", other_web3_proxy)
};
let error_sender = error_sender.clone();
// TODO: what timeout?
let timeout = Duration::from_secs(5);
let loop_f = a_loop(
"other /health",
seconds,
log::Level::Warn,
error_sender,
move |error_builder| simple::main(error_builder, url.clone(), timeout),
);
handles.push(tokio::spawn(loop_f));
}
// compare the main web3-proxy head block to all web3-proxies and rpcs
{
let max_age = self.max_age;
let max_lag = self.max_lag;
let primary_proxy = primary_proxy.clone();
let error_sender = error_sender.clone();
let mut others = other_proxy.clone();
others.extend(other_rpc);
let loop_f = a_loop(
"head block comparison",
seconds,
log::Level::Error,
error_sender,
move |error_builder| {
compare::main(
error_builder,
primary_proxy.clone(),
others.clone(),
max_age,
max_lag,
)
},
);
handles.push(tokio::spawn(loop_f));
}
// wait for any returned values (if everything is working, they will all run forever)
while let Some(x) = handles.next().await {
// any errors that make it here will end the program
x??;
}
Ok(())
}
}
async fn a_loop<T>(
class: &str,
seconds: u64,
error_level: log::Level,
error_sender: mpsc::Sender<SentrydError>,
f: impl Fn(SentrydErrorBuilder) -> T,
) -> anyhow::Result<()>
where
T: Future<Output = SentrydResult> + Send + 'static,
{
let error_builder = SentrydErrorBuilder {
class: class.to_owned(),
level: error_level,
};
let mut interval = interval(Duration::from_secs(seconds));
// TODO: should we warn if there are delays?
interval.set_missed_tick_behavior(MissedTickBehavior::Delay);
loop {
interval.tick().await;
if let Err(err) = f(error_builder.clone()).await {
error_sender.send(err).await?;
};
}
}

@ -0,0 +1,60 @@
use std::time::Duration;
use super::{SentrydErrorBuilder, SentrydResult};
use anyhow::Context;
use log::{debug, trace};
use tokio::time::Instant;
/// GET the url and return an error if it wasn't a success
pub async fn main(
error_builder: SentrydErrorBuilder,
url: String,
timeout: Duration,
) -> SentrydResult {
let start = Instant::now();
let r = reqwest::get(&url)
.await
.context(format!("Failed GET {}", &url))
.map_err(|x| error_builder.build(x))?;
let elapsed = start.elapsed();
if elapsed > timeout {
return error_builder.result(
anyhow::anyhow!(
"query took longer than {}ms ({}ms): {:#?}",
timeout.as_millis(),
elapsed.as_millis(),
r
)
.context(format!("fetching {} took too long", &url)),
);
}
// TODO: what should we do if we get rate limited here?
if r.status().is_success() {
debug!("{} is healthy", &url);
trace!("Successful {:#?}", r);
return Ok(());
}
// TODO: capture headers? or is that already part of r?
let detail = format!("{:#?}", r);
let summary = format!("{} is unhealthy: {}", &url, r.status());
let body = r
.text()
.await
.context(detail.clone())
.context(summary.clone())
.map_err(|x| error_builder.build(x))?;
error_builder.result(
anyhow::anyhow!("body: {}", body)
.context(detail)
.context(summary),
)
}

@ -4,6 +4,7 @@ use crate::rpcs::request::OpenRequestHandleMetrics;
use crate::{app::AnyhowJoinHandle, rpcs::blockchain::ArcBlock};
use argh::FromArgs;
use ethers::prelude::TxHash;
use ethers::types::U256;
use hashbrown::HashMap;
use log::warn;
use migration::sea_orm::DatabaseConnection;
@ -38,7 +39,7 @@ pub struct CliConfig {
pub cookie_key_filename: String,
}
#[derive(Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize)]
pub struct TopConfig {
pub app: AppConfig,
pub balanced_rpcs: HashMap<String, Web3ConnectionConfig>,
@ -51,7 +52,7 @@ pub struct TopConfig {
/// shared configuration between Web3Connections
// TODO: no String, only &str
#[derive(Debug, Default, Deserialize)]
#[derive(Clone, Debug, Default, Deserialize)]
pub struct AppConfig {
/// Request limit for allowed origins for anonymous users.
/// These requests get rate limited by IP.
@ -90,6 +91,12 @@ pub struct AppConfig {
/// None = allow all requests
pub default_user_max_requests_per_period: Option<u64>,
/// minimum amount to increase eth_estimateGas results
pub gas_increase_min: Option<U256>,
/// percentage to increase eth_estimateGas results. 100 == 100%
pub gas_increase_percent: Option<U256>,
/// Restrict user registration.
/// None = no code needed
pub invite_code: Option<String>,
@ -183,7 +190,7 @@ fn default_response_cache_max_bytes() -> usize {
}
/// Configuration for a backend web3 RPC server
#[derive(Debug, Deserialize)]
#[derive(Clone, Debug, Deserialize)]
pub struct Web3ConnectionConfig {
/// simple way to disable a connection without deleting the row
#[serde(default)]
@ -198,6 +205,8 @@ pub struct Web3ConnectionConfig {
pub soft_limit: u32,
/// the requests per second at which the server throws errors (rate limit or otherwise)
pub hard_limit: Option<u64>,
/// only use this rpc if everything else is lagging too far. this allows us to ignore fast but very low limit rpcs
pub backup: Option<bool>,
/// All else equal, a server with a lower tier receives all requests
#[serde(default = "default_tier")]
pub tier: u64,
@ -221,7 +230,6 @@ impl Web3ConnectionConfig {
pub async fn spawn(
self,
name: String,
allowed_lag: u64,
db_conn: Option<DatabaseConnection>,
redis_pool: Option<redis_rate_limiter::RedisPool>,
chain_id: u64,
@ -256,9 +264,10 @@ impl Web3ConnectionConfig {
None
};
let backup = self.backup.unwrap_or(false);
Web3Connection::spawn(
name,
allowed_lag,
self.display_name,
chain_id,
db_conn,
@ -267,6 +276,7 @@ impl Web3ConnectionConfig {
http_interval_sender,
hard_limit,
self.soft_limit,
backup,
self.block_data_limit,
block_map,
block_sender,

@ -85,6 +85,7 @@ pub struct RequestMetadata {
pub error_response: AtomicBool,
pub response_bytes: AtomicU64,
pub response_millis: AtomicU64,
pub response_from_backup_rpc: AtomicBool,
}
impl RequestMetadata {
@ -103,6 +104,7 @@ impl RequestMetadata {
error_response: false.into(),
response_bytes: 0.into(),
response_millis: 0.into(),
response_from_backup_rpc: false.into(),
};
Ok(new)
@ -660,13 +662,11 @@ impl Web3ProxyApp {
let db_replica = self.db_replica().context("Getting database connection")?;
let rpc_secret_key: Uuid = rpc_secret_key.into();
// TODO: join the user table to this to return the User? we don't always need it
// TODO: join on secondary users
// TODO: join on user tier
match rpc_key::Entity::find()
.filter(rpc_key::Column::SecretKey.eq(rpc_secret_key))
.filter(rpc_key::Column::SecretKey.eq(<Uuid>::from(rpc_secret_key)))
.filter(rpc_key::Column::Active.eq(true))
.one(db_replica.conn())
.await?
@ -741,7 +741,8 @@ impl Web3ProxyApp {
Ok(AuthorizationChecks {
user_id: rpc_key_model.user_id,
rpc_key_id,
rpc_secret_key: Some(rpc_secret_key),
rpc_secret_key_id: rpc_key_id,
allowed_ips,
allowed_origins,
allowed_referers,
@ -774,7 +775,7 @@ impl Web3ProxyApp {
let authorization_checks = self.authorization_checks(rpc_key).await?;
// if no rpc_key_id matching the given rpc was found, then we can't rate limit by key
if authorization_checks.rpc_key_id.is_none() {
if authorization_checks.rpc_secret_key_id.is_none() {
return Ok(RateLimitResult::UnknownKey);
}
@ -845,3 +846,29 @@ impl Web3ProxyApp {
}
}
}
impl Authorization {
pub async fn check_again(
&self,
app: &Arc<Web3ProxyApp>,
) -> Result<(Arc<Self>, Option<OwnedSemaphorePermit>), FrontendErrorResponse> {
// TODO: we could probably do this without clones. but this is easy
let (a, s) = if let Some(rpc_secret_key) = self.checks.rpc_secret_key {
key_is_authorized(
app,
rpc_secret_key,
self.ip,
self.origin.clone(),
self.referer.clone(),
self.user_agent.clone(),
)
.await?
} else {
ip_is_authorized(app, self.ip, self.origin.clone()).await?
};
let a = Arc::new(a);
Ok((a, s))
}
}

@ -35,7 +35,6 @@ pub enum FrontendErrorResponse {
NotFound,
RateLimited(Authorization, Option<Instant>),
Redis(RedisError),
Response(Response),
/// simple way to return an error message to the user and an anyhow to our logs
StatusCode(StatusCode, String, Option<anyhow::Error>),
/// TODO: what should be attached to the timout?
@ -44,11 +43,9 @@ pub enum FrontendErrorResponse {
UnknownKey,
}
impl IntoResponse for FrontendErrorResponse {
fn into_response(self) -> Response {
// TODO: include the request id in these so that users can give us something that will point to logs
// TODO: status code is in the jsonrpc response and is also the first item in the tuple. DRY
let (status_code, response) = match self {
impl FrontendErrorResponse {
pub fn into_response_parts(self) -> (StatusCode, JsonRpcForwardedResponse) {
match self {
Self::AccessDenied => {
// TODO: attach something to this trace. probably don't include much in the message though. don't want to leak creds by accident
trace!("access denied");
@ -174,12 +171,12 @@ impl IntoResponse for FrontendErrorResponse {
};
// create a string with either the IP or the rpc_key_id
let msg = if authorization.checks.rpc_key_id.is_none() {
let msg = if authorization.checks.rpc_secret_key_id.is_none() {
format!("too many requests from {}.{}", authorization.ip, retry_msg)
} else {
format!(
"too many requests from rpc key #{}.{}",
authorization.checks.rpc_key_id.unwrap(),
authorization.checks.rpc_secret_key_id.unwrap(),
retry_msg
)
};
@ -204,10 +201,6 @@ impl IntoResponse for FrontendErrorResponse {
),
)
}
Self::Response(r) => {
debug_assert_ne!(r.status(), StatusCode::OK);
return r;
}
Self::SemaphoreAcquireError(err) => {
warn!("semaphore acquire err={:?}", err);
(
@ -274,7 +267,15 @@ impl IntoResponse for FrontendErrorResponse {
None,
),
),
};
}
}
}
impl IntoResponse for FrontendErrorResponse {
fn into_response(self) -> Response {
// TODO: include the request id in these so that users can give us something that will point to logs
// TODO: status code is in the jsonrpc response and is also the first item in the tuple. DRY
let (status_code, response) = self.into_response_parts();
(status_code, Json(response)).into_response()
}

@ -41,28 +41,102 @@ pub async fn serve(port: u16, proxy_app: Arc<Web3ProxyApp>) -> anyhow::Result<()
.time_to_live(Duration::from_secs(1))
.build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());
// TODO: read config for if fastest/versus should be available publicly. default off
// build our axum Router
let app = Router::new()
// routes should be ordered most to least common
// TODO: i think these routes could be done a lot better
//
// HTTP RPC (POST)
//
// public
.route("/", post(rpc_proxy_http::proxy_web3_rpc))
// authenticated with and without trailing slash
.route(
"/rpc/:rpc_key/",
post(rpc_proxy_http::proxy_web3_rpc_with_key),
)
.route(
"/rpc/:rpc_key",
post(rpc_proxy_http::proxy_web3_rpc_with_key),
)
// public fastest with and without trailing slash
.route("/fastest/", post(rpc_proxy_http::fastest_proxy_web3_rpc))
.route("/fastest", post(rpc_proxy_http::fastest_proxy_web3_rpc))
// authenticated fastest with and without trailing slash
.route(
"/fastest/:rpc_key/",
post(rpc_proxy_http::fastest_proxy_web3_rpc_with_key),
)
.route(
"/fastest/:rpc_key",
post(rpc_proxy_http::fastest_proxy_web3_rpc_with_key),
)
// public versus
.route("/versus/", post(rpc_proxy_http::versus_proxy_web3_rpc))
.route("/versus", post(rpc_proxy_http::versus_proxy_web3_rpc))
// authenticated versus with and without trailing slash
.route(
"/versus/:rpc_key/",
post(rpc_proxy_http::versus_proxy_web3_rpc_with_key),
)
.route(
"/versus/:rpc_key",
post(rpc_proxy_http::versus_proxy_web3_rpc_with_key),
)
//
// Websocket RPC (GET)
// If not an RPC, this will redirect to configurable urls
//
// public
.route("/", get(rpc_proxy_ws::websocket_handler))
.route(
"/rpc/:rpc_key",
post(rpc_proxy_http::proxy_web3_rpc_with_key),
)
// authenticated with and without trailing slash
.route(
"/rpc/:rpc_key/",
post(rpc_proxy_http::proxy_web3_rpc_with_key),
get(rpc_proxy_ws::websocket_handler_with_key),
)
.route(
"/rpc/:rpc_key",
get(rpc_proxy_ws::websocket_handler_with_key),
)
// public fastest with and without trailing slash
.route("/fastest/", get(rpc_proxy_ws::fastest_websocket_handler))
.route("/fastest", get(rpc_proxy_ws::fastest_websocket_handler))
// authenticated fastest with and without trailing slash
.route(
"/rpc/:rpc_key/",
get(rpc_proxy_ws::websocket_handler_with_key),
"/fastest/:rpc_key/",
get(rpc_proxy_ws::fastest_websocket_handler_with_key),
)
.route(
"/fastest/:rpc_key",
get(rpc_proxy_ws::fastest_websocket_handler_with_key),
)
// public versus
.route(
"/versus/",
get(rpc_proxy_ws::versus_websocket_handler_with_key),
)
.route(
"/versus",
get(rpc_proxy_ws::versus_websocket_handler_with_key),
)
// authenticated versus with and without trailing slash
.route(
"/versus/:rpc_key/",
get(rpc_proxy_ws::versus_websocket_handler_with_key),
)
.route(
"/versus/:rpc_key",
get(rpc_proxy_ws::versus_websocket_handler_with_key),
)
//
// System things
//
.route("/health", get(status::health))
.route("/status", get(status::status))
//
// User stuff
//
.route("/user/login/:user_address", get(users::user_login_get))
.route(
"/user/login/:user_address/:message_eip",
@ -88,9 +162,11 @@ pub async fn serve(port: u16, proxy_app: Arc<Web3ProxyApp>) -> anyhow::Result<()
.route("/user/stats/detailed", get(users::user_stats_detailed_get))
.route("/admin/modify_role", get(admin::admin_change_user_roles))
.route("/user/logout", post(users::user_logout_post))
.route("/status", get(status::status))
//
// Axum layers
// layers are ordered bottom up
// the last layer is first for requests and last for responses
//
// Mark the `Authorization` request header as sensitive so it doesn't show in logs
.layer(SetSensitiveRequestHeadersLayer::new(once(AUTHORIZATION)))
// handle cors

@ -2,6 +2,7 @@
use super::authorization::{ip_is_authorized, key_is_authorized};
use super::errors::FrontendResult;
use super::rpc_proxy_ws::ProxyMode;
use crate::{app::Web3ProxyApp, jsonrpc::JsonRpcRequestEnum};
use axum::extract::Path;
use axum::headers::{Origin, Referer, UserAgent};
@ -18,9 +19,41 @@ use std::sync::Arc;
#[debug_handler]
pub async fn proxy_web3_rpc(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ClientIp(ip): ClientIp,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
Json(payload): Json<JsonRpcRequestEnum>,
) -> FrontendResult {
_proxy_web3_rpc(app, ip, origin, payload, ProxyMode::Best).await
}
#[debug_handler]
pub async fn fastest_proxy_web3_rpc(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
Json(payload): Json<JsonRpcRequestEnum>,
) -> FrontendResult {
// TODO: read the fastest number from params
// TODO: check that the app allows this without authentication
_proxy_web3_rpc(app, ip, origin, payload, ProxyMode::Fastest(0)).await
}
#[debug_handler]
pub async fn versus_proxy_web3_rpc(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
Json(payload): Json<JsonRpcRequestEnum>,
) -> FrontendResult {
_proxy_web3_rpc(app, ip, origin, payload, ProxyMode::Versus).await
}
async fn _proxy_web3_rpc(
app: Arc<Web3ProxyApp>,
ClientIp(ip): ClientIp,
origin: Option<TypedHeader<Origin>>,
payload: JsonRpcRequestEnum,
proxy_mode: ProxyMode,
) -> FrontendResult {
// TODO: benchmark spawning this
// TODO: do we care about keeping the TypedHeader wrapper?
@ -31,7 +64,7 @@ pub async fn proxy_web3_rpc(
let authorization = Arc::new(authorization);
let (response, rpcs, _semaphore) = app
.proxy_web3_rpc(authorization, payload)
.proxy_web3_rpc(authorization, payload, proxy_mode)
.await
.map(|(x, y)| (x, y, semaphore))?;
@ -58,12 +91,82 @@ pub async fn proxy_web3_rpc(
#[debug_handler]
pub async fn proxy_web3_rpc_with_key(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ClientIp(ip): ClientIp,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
referer: Option<TypedHeader<Referer>>,
user_agent: Option<TypedHeader<UserAgent>>,
Path(rpc_key): Path<String>,
Json(payload): Json<JsonRpcRequestEnum>,
) -> FrontendResult {
_proxy_web3_rpc_with_key(
app,
ip,
origin,
referer,
user_agent,
rpc_key,
payload,
ProxyMode::Best,
)
.await
}
#[debug_handler]
pub async fn fastest_proxy_web3_rpc_with_key(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
referer: Option<TypedHeader<Referer>>,
user_agent: Option<TypedHeader<UserAgent>>,
Path(rpc_key): Path<String>,
Json(payload): Json<JsonRpcRequestEnum>,
) -> FrontendResult {
_proxy_web3_rpc_with_key(
app,
ip,
origin,
referer,
user_agent,
rpc_key,
payload,
ProxyMode::Fastest(0),
)
.await
}
#[debug_handler]
pub async fn versus_proxy_web3_rpc_with_key(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
referer: Option<TypedHeader<Referer>>,
user_agent: Option<TypedHeader<UserAgent>>,
Path(rpc_key): Path<String>,
Json(payload): Json<JsonRpcRequestEnum>,
) -> FrontendResult {
_proxy_web3_rpc_with_key(
app,
ip,
origin,
referer,
user_agent,
rpc_key,
payload,
ProxyMode::Versus,
)
.await
}
#[allow(clippy::too_many_arguments)]
async fn _proxy_web3_rpc_with_key(
app: Arc<Web3ProxyApp>,
ClientIp(ip): ClientIp,
origin: Option<TypedHeader<Origin>>,
referer: Option<TypedHeader<Referer>>,
user_agent: Option<TypedHeader<UserAgent>>,
rpc_key: String,
payload: JsonRpcRequestEnum,
proxy_mode: ProxyMode,
) -> FrontendResult {
// TODO: DRY w/ proxy_web3_rpc
// the request can take a while, so we spawn so that we can start serving another request
@ -82,7 +185,7 @@ pub async fn proxy_web3_rpc_with_key(
let authorization = Arc::new(authorization);
let (response, rpcs, _semaphore) = app
.proxy_web3_rpc(authorization, payload)
.proxy_web3_rpc(authorization, payload, proxy_mode)
.await
.map(|(x, y)| (x, y, semaphore))?;

@ -32,11 +32,60 @@ use serde_json::json;
use serde_json::value::to_raw_value;
use std::sync::Arc;
use std::{str::from_utf8_mut, sync::atomic::AtomicUsize};
use tokio::sync::{broadcast, OwnedSemaphorePermit, RwLock};
#[derive(Copy, Clone)]
pub enum ProxyMode {
/// send to the "best" synced server
Best,
/// send to all synced servers and return the fastest non-error response (reverts do not count as errors here)
Fastest(usize),
/// send to all servers for benchmarking. return the fastest non-error response
Versus,
}
/// Public entrypoint for WebSocket JSON-RPC requests.
/// Queries a single server at a time
#[debug_handler]
pub async fn websocket_handler(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
ws_upgrade: Option<WebSocketUpgrade>,
) -> FrontendResult {
_websocket_handler(ProxyMode::Best, app, ip, origin, ws_upgrade).await
}
/// Public entrypoint for WebSocket JSON-RPC requests that uses all synced servers.
/// Queries all synced backends with every request! This might get expensive!
#[debug_handler]
pub async fn fastest_websocket_handler(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
ws_upgrade: Option<WebSocketUpgrade>,
) -> FrontendResult {
// TODO: get the fastest number from the url params (default to 0/all)
// TODO: config to disable this
_websocket_handler(ProxyMode::Fastest(0), app, ip, origin, ws_upgrade).await
}
/// Public entrypoint for WebSocket JSON-RPC requests that uses all synced servers.
/// Queries **all** backends with every request! This might get expensive!
#[debug_handler]
pub async fn versus_websocket_handler(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
origin: Option<TypedHeader<Origin>>,
ws_upgrade: Option<WebSocketUpgrade>,
) -> FrontendResult {
// TODO: config to disable this
_websocket_handler(ProxyMode::Versus, app, ip, origin, ws_upgrade).await
}
async fn _websocket_handler(
proxy_mode: ProxyMode,
app: Arc<Web3ProxyApp>,
ClientIp(ip): ClientIp,
origin: Option<TypedHeader<Origin>>,
ws_upgrade: Option<WebSocketUpgrade>,
@ -49,7 +98,7 @@ pub async fn websocket_handler(
match ws_upgrade {
Some(ws) => Ok(ws
.on_upgrade(|socket| proxy_web3_socket(app, authorization, socket))
.on_upgrade(move |socket| proxy_web3_socket(app, authorization, socket, proxy_mode))
.into_response()),
None => {
if let Some(redirect) = &app.config.redirect_public_url {
@ -72,12 +121,83 @@ pub async fn websocket_handler(
#[debug_handler]
pub async fn websocket_handler_with_key(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ClientIp(ip): ClientIp,
ip: ClientIp,
Path(rpc_key): Path<String>,
origin: Option<TypedHeader<Origin>>,
referer: Option<TypedHeader<Referer>>,
user_agent: Option<TypedHeader<UserAgent>>,
ws_upgrade: Option<WebSocketUpgrade>,
) -> FrontendResult {
_websocket_handler_with_key(
ProxyMode::Best,
app,
ip,
rpc_key,
origin,
referer,
user_agent,
ws_upgrade,
)
.await
}
#[debug_handler]
pub async fn fastest_websocket_handler_with_key(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
Path(rpc_key): Path<String>,
origin: Option<TypedHeader<Origin>>,
referer: Option<TypedHeader<Referer>>,
user_agent: Option<TypedHeader<UserAgent>>,
ws_upgrade: Option<WebSocketUpgrade>,
) -> FrontendResult {
// TODO: get the fastest number from the url params (default to 0/all)
_websocket_handler_with_key(
ProxyMode::Fastest(0),
app,
ip,
rpc_key,
origin,
referer,
user_agent,
ws_upgrade,
)
.await
}
#[debug_handler]
pub async fn versus_websocket_handler_with_key(
Extension(app): Extension<Arc<Web3ProxyApp>>,
ip: ClientIp,
Path(rpc_key): Path<String>,
origin: Option<TypedHeader<Origin>>,
referer: Option<TypedHeader<Referer>>,
user_agent: Option<TypedHeader<UserAgent>>,
ws_upgrade: Option<WebSocketUpgrade>,
) -> FrontendResult {
_websocket_handler_with_key(
ProxyMode::Versus,
app,
ip,
rpc_key,
origin,
referer,
user_agent,
ws_upgrade,
)
.await
}
#[allow(clippy::too_many_arguments)]
async fn _websocket_handler_with_key(
proxy_mode: ProxyMode,
app: Arc<Web3ProxyApp>,
ClientIp(ip): ClientIp,
rpc_key: String,
origin: Option<TypedHeader<Origin>>,
referer: Option<TypedHeader<Referer>>,
user_agent: Option<TypedHeader<UserAgent>>,
ws_upgrade: Option<WebSocketUpgrade>,
) -> FrontendResult {
let rpc_key = rpc_key.parse()?;
@ -96,9 +216,8 @@ pub async fn websocket_handler_with_key(
let authorization = Arc::new(authorization);
match ws_upgrade {
Some(ws_upgrade) => {
Ok(ws_upgrade.on_upgrade(move |socket| proxy_web3_socket(app, authorization, socket)))
}
Some(ws_upgrade) => Ok(ws_upgrade
.on_upgrade(move |socket| proxy_web3_socket(app, authorization, socket, proxy_mode))),
None => {
// if no websocket upgrade, this is probably a user loading the url with their browser
@ -107,7 +226,7 @@ pub async fn websocket_handler_with_key(
match (
&app.config.redirect_public_url,
&app.config.redirect_rpc_key_url,
authorization.checks.rpc_key_id,
authorization.checks.rpc_secret_key_id,
) {
(None, None, _) => Err(FrontendErrorResponse::StatusCode(
StatusCode::BAD_REQUEST,
@ -120,7 +239,7 @@ pub async fn websocket_handler_with_key(
(_, Some(redirect_rpc_key_url), rpc_key_id) => {
let reg = Handlebars::new();
if authorization.checks.rpc_key_id.is_none() {
if authorization.checks.rpc_secret_key_id.is_none() {
// i don't think this is possible
Err(FrontendErrorResponse::StatusCode(
StatusCode::UNAUTHORIZED,
@ -154,6 +273,7 @@ async fn proxy_web3_socket(
app: Arc<Web3ProxyApp>,
authorization: Arc<Authorization>,
socket: WebSocket,
proxy_mode: ProxyMode,
) {
// split the websocket so we can read and write concurrently
let (ws_tx, ws_rx) = socket.split();
@ -162,7 +282,13 @@ async fn proxy_web3_socket(
let (response_sender, response_receiver) = flume::unbounded::<Message>();
tokio::spawn(write_web3_socket(response_receiver, ws_tx));
tokio::spawn(read_web3_socket(app, authorization, ws_rx, response_sender));
tokio::spawn(read_web3_socket(
app,
authorization,
ws_rx,
response_sender,
proxy_mode,
));
}
/// websockets support a few more methods than http clients
@ -172,8 +298,20 @@ async fn handle_socket_payload(
payload: &str,
response_sender: &flume::Sender<Message>,
subscription_count: &AtomicUsize,
subscriptions: &mut HashMap<String, AbortHandle>,
) -> Message {
subscriptions: Arc<RwLock<HashMap<String, AbortHandle>>>,
proxy_mode: ProxyMode,
) -> (Message, Option<OwnedSemaphorePermit>) {
let (authorization, semaphore) = match authorization.check_again(&app).await {
Ok((a, s)) => (a, s),
Err(err) => {
let (_, err) = err.into_response_parts();
let err = serde_json::to_string(&err).expect("to_string should always work here");
return (Message::Text(err), None);
}
};
// TODO: do any clients send batches over websockets?
let (id, response) = match serde_json::from_str::<JsonRpcRequest>(payload) {
Ok(json_request) => {
@ -183,6 +321,7 @@ async fn handle_socket_payload(
[..]
{
"eth_subscribe" => {
// TODO: how can we subscribe with proxy_mode?
match app
.eth_subscribe(
authorization.clone(),
@ -194,7 +333,9 @@ async fn handle_socket_payload(
{
Ok((handle, response)) => {
// TODO: better key
subscriptions.insert(
let mut x = subscriptions.write().await;
x.insert(
response
.result
.as_ref()
@ -218,8 +359,10 @@ async fn handle_socket_payload(
let subscription_id = json_request.params.unwrap().to_string();
let mut x = subscriptions.write().await;
// TODO: is this the right response?
let partial_response = match subscriptions.remove(&subscription_id) {
let partial_response = match x.remove(&subscription_id) {
None => false,
Some(handle) => {
handle.abort();
@ -227,6 +370,8 @@ async fn handle_socket_payload(
}
};
drop(x);
let response =
JsonRpcForwardedResponse::from_value(json!(partial_response), id.clone());
@ -247,7 +392,7 @@ async fn handle_socket_payload(
Ok(response.into())
}
_ => app
.proxy_web3_rpc(authorization.clone(), json_request.into())
.proxy_web3_rpc(authorization.clone(), json_request.into(), proxy_mode)
.await
.map_or_else(
|err| match err {
@ -281,9 +426,7 @@ async fn handle_socket_payload(
}
};
// TODO: what error should this be?
Message::Text(response_str)
(Message::Text(response_str), semaphore)
}
async fn read_web3_socket(
@ -291,60 +434,99 @@ async fn read_web3_socket(
authorization: Arc<Authorization>,
mut ws_rx: SplitStream<WebSocket>,
response_sender: flume::Sender<Message>,
proxy_mode: ProxyMode,
) {
let mut subscriptions = HashMap::new();
let subscription_count = AtomicUsize::new(1);
// TODO: need a concurrent hashmap
let subscriptions = Arc::new(RwLock::new(HashMap::new()));
let subscription_count = Arc::new(AtomicUsize::new(1));
while let Some(Ok(msg)) = ws_rx.next().await {
// TODO: spawn this?
// new message from our client. forward to a backend and then send it through response_tx
let response_msg = match msg {
Message::Text(payload) => {
handle_socket_payload(
app.clone(),
&authorization,
&payload,
&response_sender,
&subscription_count,
&mut subscriptions,
)
.await
let (close_sender, mut close_receiver) = broadcast::channel(1);
loop {
tokio::select! {
msg = ws_rx.next() => {
if let Some(Ok(msg)) = msg {
// spawn so that we can serve responses from this loop even faster
// TODO: only do these clones if the msg is text/binary?
let close_sender = close_sender.clone();
let app = app.clone();
let authorization = authorization.clone();
let response_sender = response_sender.clone();
let subscriptions = subscriptions.clone();
let subscription_count = subscription_count.clone();
let f = async move {
let mut _semaphore = None;
// new message from our client. forward to a backend and then send it through response_tx
let response_msg = match msg {
Message::Text(payload) => {
let (msg, s) = handle_socket_payload(
app.clone(),
&authorization,
&payload,
&response_sender,
&subscription_count,
subscriptions,
proxy_mode,
)
.await;
_semaphore = s;
msg
}
Message::Ping(x) => {
trace!("ping: {:?}", x);
Message::Pong(x)
}
Message::Pong(x) => {
trace!("pong: {:?}", x);
return;
}
Message::Close(_) => {
info!("closing websocket connection");
// TODO: do something to close subscriptions?
let _ = close_sender.send(true);
return;
}
Message::Binary(mut payload) => {
let payload = from_utf8_mut(&mut payload).unwrap();
let (msg, s) = handle_socket_payload(
app.clone(),
&authorization,
payload,
&response_sender,
&subscription_count,
subscriptions,
proxy_mode,
)
.await;
_semaphore = s;
msg
}
};
if response_sender.send_async(response_msg).await.is_err() {
let _ = close_sender.send(true);
return;
};
_semaphore = None;
};
tokio::spawn(f);
} else {
break;
}
}
Message::Ping(x) => {
trace!("ping: {:?}", x);
Message::Pong(x)
}
Message::Pong(x) => {
trace!("pong: {:?}", x);
continue;
}
Message::Close(_) => {
info!("closing websocket connection");
_ = close_receiver.recv() => {
break;
}
Message::Binary(mut payload) => {
// TODO: poke rate limit for the user/ip
let payload = from_utf8_mut(&mut payload).unwrap();
handle_socket_payload(
app.clone(),
&authorization,
payload,
&response_sender,
&subscription_count,
&mut subscriptions,
)
.await
}
};
match response_sender.send_async(response_msg).await {
Ok(_) => {}
Err(err) => {
error!("{}", err);
break;
}
};
}
}
}

@ -4,7 +4,7 @@
//! They will eventually move to another port.
use super::{FrontendResponseCache, FrontendResponseCaches};
use crate::app::Web3ProxyApp;
use crate::app::{Web3ProxyApp, APP_USER_AGENT};
use axum::{http::StatusCode, response::IntoResponse, Extension, Json};
use axum_macros::debug_handler;
use serde_json::json;
@ -33,6 +33,7 @@ pub async fn status(
.get_with(FrontendResponseCaches::Status, async {
// TODO: what else should we include? uptime, cache hit rates, cpu load, memory used
let body = json!({
"version": APP_USER_AGENT,
"chain_id": app.config.chain_id,
"balanced_rpcs": app.balanced_rpcs,
"private_rpcs": app.private_rpcs,

@ -7,6 +7,7 @@ pub mod frontend;
pub mod jsonrpc;
pub mod metered;
pub mod metrics_frontend;
pub mod pagerduty;
pub mod rpcs;
pub mod user_queries;
pub mod user_token;

191
web3_proxy/src/pagerduty.rs Normal file

@ -0,0 +1,191 @@
use crate::config::TopConfig;
use gethostname::gethostname;
use log::{debug, error};
use pagerduty_rs::eventsv2sync::EventsV2 as PagerdutySyncEventsV2;
use pagerduty_rs::types::{AlertTrigger, AlertTriggerPayload, Event};
use serde::Serialize;
use std::{
collections::hash_map::DefaultHasher,
hash::{Hash, Hasher},
panic::PanicInfo,
};
use time::OffsetDateTime;
/*
let client = top_config
.as_ref()
.map(|top_config| format!("web3-proxy chain #{}", top_config.app.chain_id))
.unwrap_or_else(|| format!("web3-proxy w/o chain"));
let client_url = top_config
.as_ref()
.and_then(|x| x.app.redirect_public_url.clone());
panic::set_hook(Box::new(move |x| {
let hostname = gethostname().into_string().unwrap_or("unknown".to_string());
let panic_msg = format!("{} {:?}", x, x);
if panic_msg.starts_with("panicked at 'WS Server panic") {
info!("Underlying library {}", panic_msg);
} else {
error!("sending panic to pagerduty: {}", panic_msg);
let mut s = DefaultHasher::new();
panic_msg.hash(&mut s);
panic_msg.hash(&mut s);
let dedup_key = s.finish().to_string();
let payload = AlertTriggerPayload {
severity: pagerduty_rs::types::Severity::Error,
summary: panic_msg,
source: hostname,
timestamp: None,
component: None,
group: Some("web3-proxy".to_string()),
class: Some("panic".to_string()),
custom_details: None::<()>,
};
let event = Event::AlertTrigger(AlertTrigger {
payload,
dedup_key: Some(dedup_key),
images: None,
links: None,
client: Some(client.clone()),
client_url: client_url.clone(),
});
if let Err(err) = pagerduty_sync.event(event) {
error!("Failed sending panic to pagerduty: {}", err);
}
}
}));
*/
pub fn panic_handler(
top_config: Option<TopConfig>,
pagerduty_sync: &PagerdutySyncEventsV2,
panic_info: &PanicInfo,
) {
let summary = format!("{}", panic_info);
let details = format!("{:#?}", panic_info);
if summary.starts_with("panicked at 'WS Server panic") {
// the ethers-rs library panics when websockets disconnect. this isn't a panic we care about reporting
debug!("Underlying library {}", details);
return;
}
let class = Some("panic".to_string());
let alert = if let Some(top_config) = top_config {
pagerduty_alert_for_config(
class,
None,
Some(details),
pagerduty_rs::types::Severity::Critical,
summary,
None,
top_config,
)
} else {
pagerduty_alert(
None,
class,
None,
None,
None,
Some(details),
pagerduty_rs::types::Severity::Critical,
None,
summary,
None,
)
};
let event = Event::AlertTrigger(alert);
if let Err(err) = pagerduty_sync.event(event) {
error!("Failed sending alert to pagerduty! {:#?}", err);
}
}
pub fn pagerduty_alert_for_config<T: Serialize>(
class: Option<String>,
component: Option<String>,
custom_details: Option<T>,
severity: pagerduty_rs::types::Severity,
summary: String,
timestamp: Option<OffsetDateTime>,
top_config: TopConfig,
) -> AlertTrigger<T> {
let chain_id = top_config.app.chain_id;
let client_url = top_config.app.redirect_public_url.clone();
pagerduty_alert(
Some(chain_id),
class,
None,
client_url,
component,
custom_details,
severity,
None,
summary,
timestamp,
)
}
pub fn pagerduty_alert<T: Serialize>(
chain_id: Option<u64>,
class: Option<String>,
client: Option<String>,
client_url: Option<String>,
component: Option<String>,
custom_details: Option<T>,
severity: pagerduty_rs::types::Severity,
source: Option<String>,
summary: String,
timestamp: Option<OffsetDateTime>,
) -> AlertTrigger<T> {
let client = client.unwrap_or_else(|| "web3-proxy".to_string());
let group = chain_id.map(|x| format!("chain #{}", x));
let source =
source.unwrap_or_else(|| gethostname().into_string().unwrap_or("unknown".to_string()));
let mut s = DefaultHasher::new();
// TODO: include severity here?
summary.hash(&mut s);
client.hash(&mut s);
client_url.hash(&mut s);
component.hash(&mut s);
group.hash(&mut s);
class.hash(&mut s);
let dedup_key = s.finish().to_string();
let payload = AlertTriggerPayload {
severity,
summary,
source,
timestamp,
component,
group,
class,
custom_details,
};
AlertTrigger {
payload,
dedup_key: Some(dedup_key),
images: None,
links: None,
client: Some(client),
client_url: client_url,
}
}

File diff suppressed because it is too large Load Diff

@ -24,22 +24,22 @@ use std::sync::atomic::{self, AtomicU32, AtomicU64};
use std::{cmp::Ordering, sync::Arc};
use thread_fast_rng::rand::Rng;
use thread_fast_rng::thread_fast_rng;
use tokio::sync::{broadcast, oneshot, RwLock as AsyncRwLock};
use tokio::sync::{broadcast, oneshot, watch, RwLock as AsyncRwLock};
use tokio::time::{interval, sleep, sleep_until, timeout, Duration, Instant, MissedTickBehavior};
// TODO: maybe provider state should have the block data limit in it. but it is inside an async lock and we can't Serialize then
#[derive(Clone, Debug)]
pub enum ProviderState {
None,
NotReady(Arc<Web3Provider>),
Ready(Arc<Web3Provider>),
Connecting(Arc<Web3Provider>),
Connected(Arc<Web3Provider>),
}
impl ProviderState {
pub async fn provider(&self, allow_not_ready: bool) -> Option<&Arc<Web3Provider>> {
match self {
ProviderState::None => None,
ProviderState::NotReady(x) => {
ProviderState::Connecting(x) => {
if allow_not_ready {
Some(x)
} else {
@ -47,7 +47,7 @@ impl ProviderState {
None
}
}
ProviderState::Ready(x) => {
ProviderState::Connected(x) => {
if x.ready() {
Some(x)
} else {
@ -63,7 +63,6 @@ pub struct Web3Connection {
pub name: String,
pub display_name: Option<String>,
pub db_conn: Option<DatabaseConnection>,
pub(super) allowed_lag: u64,
/// TODO: can we get this from the provider? do we even need it?
pub(super) url: String,
/// Some connections use an http_client. we keep a clone for reconnecting
@ -77,6 +76,8 @@ pub struct Web3Connection {
/// provider is in a RwLock so that we can replace it if re-connecting
/// it is an async lock because we hold it open across awaits
pub(super) provider_state: AsyncRwLock<ProviderState>,
/// keep track of hard limits
pub(super) hard_limit_until: Option<watch::Sender<Instant>>,
/// rate limits are stored in a central redis so that multiple proxies can share their rate limits
/// We do not use the deferred rate limiter because going over limits would cause errors
pub(super) hard_limit: Option<RedisRateLimiter>,
@ -84,6 +85,8 @@ pub struct Web3Connection {
pub(super) soft_limit: u32,
/// use web3 queries to find the block data limit for archive/pruned nodes
pub(super) automatic_block_limit: bool,
/// only use this rpc if everything else is lagging too far. this allows us to ignore fast but very low limit rpcs
pub(super) backup: bool,
/// TODO: have an enum for this so that "no limit" prints pretty?
pub(super) block_data_limit: AtomicU64,
/// Lower tiers are higher priority when sending requests
@ -99,7 +102,6 @@ impl Web3Connection {
#[allow(clippy::too_many_arguments)]
pub async fn spawn(
name: String,
allowed_lag: u64,
display_name: Option<String>,
chain_id: u64,
db_conn: Option<DatabaseConnection>,
@ -111,6 +113,7 @@ impl Web3Connection {
hard_limit: Option<(u64, RedisPool)>,
// TODO: think more about this type
soft_limit: u32,
backup: bool,
block_data_limit: Option<u64>,
block_map: BlockHashesCache,
block_sender: Option<flume::Sender<BlockAndRpc>>,
@ -135,9 +138,18 @@ impl Web3Connection {
let automatic_block_limit =
(block_data_limit.load(atomic::Ordering::Acquire) == 0) && block_sender.is_some();
// track hard limit until on backup servers (which might surprise us with rate limit changes)
// and track on servers that have a configured hard limit
let hard_limit_until = if backup || hard_limit.is_some() {
let (sender, _) = watch::channel(Instant::now());
Some(sender)
} else {
None
};
let new_connection = Self {
name,
allowed_lag,
db_conn: db_conn.clone(),
display_name,
http_client,
@ -147,8 +159,10 @@ impl Web3Connection {
internal_requests: 0.into(),
provider_state: AsyncRwLock::new(ProviderState::None),
hard_limit,
hard_limit_until,
soft_limit,
automatic_block_limit,
backup,
block_data_limit,
head_block: RwLock::new(Default::default()),
tier,
@ -191,25 +205,7 @@ impl Web3Connection {
return Ok(None);
}
// check if we are synced
let head_block: ArcBlock = self
.wait_for_request_handle(authorization, Duration::from_secs(30), true)
.await?
.request::<_, Option<_>>(
"eth_getBlockByNumber",
&json!(("latest", false)),
// error here are expected, so keep the level low
Level::Warn.into(),
)
.await?
.context("no block during check_block_data_limit!")?;
if SavedBlock::from(head_block).syncing(60) {
// if the node is syncing, we can't check its block data limit
return Ok(None);
}
// TODO: add SavedBlock to self? probably best not to. we might not get marked Ready
// TODO: check eth_syncing. if it is not false, return Ok(None)
let mut limit = None;
@ -217,7 +213,7 @@ impl Web3Connection {
// TODO: start at 0 or 1?
for block_data_limit in [0, 32, 64, 128, 256, 512, 1024, 90_000, u64::MAX] {
let handle = self
.wait_for_request_handle(authorization, Duration::from_secs(30), true)
.wait_for_request_handle(authorization, None, true)
.await?;
let head_block_num_future = handle.request::<Option<()>, U256>(
@ -243,7 +239,7 @@ impl Web3Connection {
// TODO: wait for the handle BEFORE we check the current block number. it might be delayed too!
// TODO: what should the request be?
let handle = self
.wait_for_request_handle(authorization, Duration::from_secs(30), true)
.wait_for_request_handle(authorization, None, true)
.await?;
let archive_result: Result<Bytes, _> = handle
@ -292,26 +288,10 @@ impl Web3Connection {
self.block_data_limit.load(atomic::Ordering::Acquire).into()
}
pub fn syncing(&self, allowed_lag: u64) -> bool {
match self.head_block.read().clone() {
None => true,
Some(x) => x.syncing(allowed_lag),
}
}
pub fn has_block_data(&self, needed_block_num: &U64) -> bool {
let head_block_num = match self.head_block.read().clone() {
None => return false,
Some(x) => {
// TODO: this 60 second limit is causing our polygons to fall behind. change this to number of blocks?
if x.syncing(60) {
// skip syncing nodes. even though they might be able to serve a query,
// latency will be poor and it will get in the way of them syncing further
return false;
}
x.number()
}
Some(x) => x.number(),
};
// this rpc doesn't have that block yet. still syncing
@ -370,7 +350,15 @@ impl Web3Connection {
);
let retry_in = Duration::from_millis(sleep_ms);
info!(
let error_level = if self.backup {
log::Level::Debug
} else {
log::Level::Info
};
log::log!(
error_level,
"Failed reconnect to {}! Retry in {}ms. err={:?}",
self,
retry_in.as_millis(),
@ -401,7 +389,7 @@ impl Web3Connection {
ProviderState::None => {
info!("connecting to {}", self);
}
ProviderState::NotReady(provider) | ProviderState::Ready(provider) => {
ProviderState::Connecting(provider) | ProviderState::Connected(provider) => {
// disconnect the current provider
if let Web3Provider::Mock = provider.as_ref() {
return Ok(());
@ -435,7 +423,7 @@ impl Web3Connection {
let new_provider = Web3Provider::from_str(&self.url, self.http_client.clone()).await?;
// trace!("saving provider state as NotReady on {}", self);
*provider_state = ProviderState::NotReady(Arc::new(new_provider));
*provider_state = ProviderState::Connecting(Arc::new(new_provider));
// drop the lock so that we can get a request handle
// trace!("provider_state {} unlocked", self);
@ -448,7 +436,7 @@ impl Web3Connection {
// TODO: what should the timeout be? should there be a request timeout?
// trace!("waiting on chain id for {}", self);
let found_chain_id: Result<U64, _> = self
.wait_for_request_handle(&authorization, Duration::from_secs(30), true)
.wait_for_request_handle(&authorization, None, true)
.await?
.request(
"eth_chainId",
@ -489,7 +477,7 @@ impl Web3Connection {
.context("provider missing")?
.clone();
*provider_state = ProviderState::Ready(ready_provider);
*provider_state = ProviderState::Connected(ready_provider);
// trace!("unlocked for ready...");
}
@ -543,7 +531,7 @@ impl Web3Connection {
let _ = head_block.insert(new_head_block.clone().into());
}
if self.block_data_limit() == U64::zero() && !self.syncing(1) {
if self.block_data_limit() == U64::zero() {
let authorization = Arc::new(Authorization::internal(self.db_conn.clone())?);
if let Err(err) = self.check_block_data_limit(&authorization).await {
warn!(
@ -591,8 +579,6 @@ impl Web3Connection {
reconnect: bool,
tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>,
) -> anyhow::Result<()> {
let allowed_lag = self.allowed_lag;
loop {
let http_interval_receiver = http_interval_sender.as_ref().map(|x| x.subscribe());
@ -624,8 +610,6 @@ impl Web3Connection {
let health_sleep_seconds = 10;
sleep(Duration::from_secs(health_sleep_seconds)).await;
let mut warned = 0;
loop {
// TODO: what if we just happened to have this check line up with another restart?
// TODO: think more about this
@ -644,34 +628,6 @@ impl Web3Connection {
}
// trace!("health check on {}. unlocked", conn);
if let Some(x) = &*conn.head_block.read() {
// if this block is too old, return an error so we reconnect
let current_lag = x.lag();
if current_lag > allowed_lag {
let level = if warned == 0 {
log::Level::Warn
} else if warned % 100 == 0 {
log::Level::Debug
} else {
log::Level::Trace
};
log::log!(
level,
"{} is lagged {} secs: {} {}",
conn,
current_lag,
x.number(),
x.hash(),
);
warned += 1;
} else {
// reset warnings now that we are connected
warned = 0;
}
}
sleep(Duration::from_secs(health_sleep_seconds)).await;
}
};
@ -750,7 +706,7 @@ impl Web3Connection {
// trace!("unlocked on new heads");
// TODO: need a timeout
if let ProviderState::Ready(provider) = provider_state {
if let ProviderState::Connected(provider) = provider_state {
match provider.as_ref() {
Web3Provider::Mock => unimplemented!(),
Web3Provider::Http(_provider) => {
@ -764,7 +720,7 @@ impl Web3Connection {
loop {
// TODO: what should the max_wait be?
match self
.wait_for_request_handle(&authorization, Duration::from_secs(30), false)
.wait_for_request_handle(&authorization, None, false)
.await
{
Ok(active_request_handle) => {
@ -850,7 +806,7 @@ impl Web3Connection {
Web3Provider::Ws(provider) => {
// todo: move subscribe_blocks onto the request handle?
let active_request_handle = self
.wait_for_request_handle(&authorization, Duration::from_secs(30), false)
.wait_for_request_handle(&authorization, None, false)
.await;
let mut stream = provider.subscribe_blocks().await?;
drop(active_request_handle);
@ -860,7 +816,7 @@ impl Web3Connection {
// all it does is print "new block" for the same block as current block
// TODO: how does this get wrapped in an arc? does ethers handle that?
let block: Result<Option<ArcBlock>, _> = self
.wait_for_request_handle(&authorization, Duration::from_secs(30), false)
.wait_for_request_handle(&authorization, None, false)
.await?
.request(
"eth_getBlockByNumber",
@ -922,7 +878,7 @@ impl Web3Connection {
authorization: Arc<Authorization>,
tx_id_sender: flume::Sender<(TxHash, Arc<Self>)>,
) -> anyhow::Result<()> {
if let ProviderState::Ready(provider) = self
if let ProviderState::Connected(provider) = self
.provider_state
.try_read()
.context("subscribe_pending_transactions")?
@ -961,8 +917,8 @@ impl Web3Connection {
Web3Provider::Ws(provider) => {
// TODO: maybe the subscribe_pending_txs function should be on the active_request_handle
let active_request_handle = self
.wait_for_request_handle(&authorization, Duration::from_secs(30), false)
.await;
.wait_for_request_handle(&authorization, None, false)
.await?;
let mut stream = provider.subscribe_pending_txs().await?;
@ -995,13 +951,14 @@ impl Web3Connection {
/// be careful with this; it might wait forever!
/// `allow_not_ready` is only for use by health checks while starting the provider
/// TODO: don't use anyhow. use specific error type
pub async fn wait_for_request_handle(
self: &Arc<Self>,
authorization: &Arc<Authorization>,
max_wait: Duration,
max_wait: Option<Duration>,
allow_not_ready: bool,
) -> anyhow::Result<OpenRequestHandle> {
let max_wait = Instant::now() + max_wait;
let max_wait = max_wait.map(|x| Instant::now() + x);
loop {
match self
@ -1011,21 +968,39 @@ impl Web3Connection {
Ok(OpenRequestResult::Handle(handle)) => return Ok(handle),
Ok(OpenRequestResult::RetryAt(retry_at)) => {
// TODO: emit a stat?
// // trace!(?retry_at);
let wait = retry_at.duration_since(Instant::now());
if retry_at > max_wait {
// break now since we will wait past our maximum wait time
// TODO: don't use anyhow. use specific error type
return Err(anyhow::anyhow!("timeout waiting for request handle"));
trace!(
"waiting {} millis for request handle on {}",
wait.as_millis(),
self
);
if let Some(max_wait) = max_wait {
if retry_at > max_wait {
// break now since we will wait past our maximum wait time
// TODO: don't use anyhow. use specific error type
return Err(anyhow::anyhow!("timeout waiting for request handle"));
}
}
sleep_until(retry_at).await;
}
Ok(OpenRequestResult::NotReady) => {
Ok(OpenRequestResult::NotReady(_)) => {
// TODO: when can this happen? log? emit a stat?
// TODO: subscribe to the head block on this
trace!("{} has no handle ready", self);
if let Some(max_wait) = max_wait {
let now = Instant::now();
if now > max_wait {
return Err(anyhow::anyhow!("unable to retry for request handle"));
}
}
// TODO: sleep how long? maybe just error?
// TODO: don't use anyhow. use specific error type
return Err(anyhow::anyhow!("unable to retry for request handle"));
// TODO: instead of an arbitrary sleep, subscribe to the head block on this
sleep(Duration::from_millis(10)).await;
}
Err(err) => return Err(err),
}
@ -1048,27 +1023,50 @@ impl Web3Connection {
.await
.is_none()
{
return Ok(OpenRequestResult::NotReady);
trace!("{} is not ready", self);
return Ok(OpenRequestResult::NotReady(self.backup));
}
if let Some(hard_limit_until) = self.hard_limit_until.as_ref() {
let hard_limit_ready = hard_limit_until.borrow().clone();
let now = Instant::now();
if now < hard_limit_ready {
return Ok(OpenRequestResult::RetryAt(hard_limit_ready));
}
}
// check rate limits
if let Some(ratelimiter) = self.hard_limit.as_ref() {
// TODO: how should we know if we should set expire or not?
match ratelimiter.throttle().await? {
match ratelimiter
.throttle()
.await
.context(format!("attempting to throttle {}", self))?
{
RedisRateLimitResult::Allowed(_) => {
// // trace!("rate limit succeeded")
// trace!("rate limit succeeded")
}
RedisRateLimitResult::RetryAt(retry_at, _) => {
// rate limit failed
// save the smallest retry_after. if nothing succeeds, return an Err with retry_after in it
// TODO: use tracing better
// TODO: i'm seeing "Exhausted rate limit on moralis: 0ns". How is it getting 0?
warn!("Exhausted rate limit on {}. Retry at {:?}", self, retry_at);
// rate limit gave us a wait time
if !self.backup {
let when = retry_at.duration_since(Instant::now());
warn!(
"Exhausted rate limit on {}. Retry in {}ms",
self,
when.as_millis()
);
}
if let Some(hard_limit_until) = self.hard_limit_until.as_ref() {
hard_limit_until.send_replace(retry_at.clone());
}
return Ok(OpenRequestResult::RetryAt(retry_at));
}
RedisRateLimitResult::RetryNever => {
return Ok(OpenRequestResult::NotReady);
return Ok(OpenRequestResult::NotReady(self.backup));
}
}
};
@ -1213,7 +1211,6 @@ mod tests {
let x = Web3Connection {
name: "name".to_string(),
allowed_lag: 10,
db_conn: None,
display_name: None,
url: "ws://example.com".to_string(),
@ -1223,8 +1220,10 @@ mod tests {
internal_requests: 0.into(),
provider_state: AsyncRwLock::new(ProviderState::None),
hard_limit: None,
hard_limit_until: None,
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: block_data_limit.into(),
tier: 0,
head_block: RwLock::new(Some(head_block.clone())),
@ -1261,7 +1260,6 @@ mod tests {
// TODO: this is getting long. have a `impl Default`
let x = Web3Connection {
name: "name".to_string(),
allowed_lag: 10,
db_conn: None,
display_name: None,
url: "ws://example.com".to_string(),
@ -1271,8 +1269,10 @@ mod tests {
internal_requests: 0.into(),
provider_state: AsyncRwLock::new(ProviderState::None),
hard_limit: None,
hard_limit_until: None,
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: block_data_limit.into(),
tier: 0,
head_block: RwLock::new(Some(head_block.clone())),
@ -1288,6 +1288,8 @@ mod tests {
assert!(!x.has_block_data(&(head_block.number() + 1000)));
}
/*
// TODO: think about how to bring the concept of a "lagged" node back
#[test]
fn test_lagged_node_not_has_block_data() {
let now: U256 = SystemTime::now()
@ -1313,7 +1315,6 @@ mod tests {
let x = Web3Connection {
name: "name".to_string(),
allowed_lag: 10,
db_conn: None,
display_name: None,
url: "ws://example.com".to_string(),
@ -1325,6 +1326,7 @@ mod tests {
hard_limit: None,
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: block_data_limit.into(),
tier: 0,
head_block: RwLock::new(Some(head_block.clone())),
@ -1337,4 +1339,5 @@ mod tests {
assert!(!x.has_block_data(&(head_block.number() + 1)));
assert!(!x.has_block_data(&(head_block.number() + 1000)));
}
*/
}

File diff suppressed because it is too large Load Diff

@ -27,7 +27,8 @@ pub enum OpenRequestResult {
/// Unable to start a request. Retry at the given time.
RetryAt(Instant),
/// Unable to start a request because the server is not synced
NotReady,
/// contains "true" if backup servers were attempted
NotReady(bool),
}
/// Make RPC requests through this handle and drop it when you are done.
@ -42,7 +43,7 @@ pub struct OpenRequestHandle {
}
/// Depending on the context, RPC errors can require different handling.
pub enum RequestErrorHandler {
pub enum RequestRevertHandler {
/// Log at the trace level. Use when errors are expected.
TraceLevel,
/// Log at the debug level. Use when errors are expected.
@ -52,7 +53,7 @@ pub enum RequestErrorHandler {
/// Log at the warn level. Use when errors do not cause problems.
WarnLevel,
/// Potentially save the revert. Users can tune how often this happens
SaveReverts,
Save,
}
// TODO: second param could be skipped since we don't need it here
@ -65,13 +66,13 @@ struct EthCallFirstParams {
data: Option<Bytes>,
}
impl From<Level> for RequestErrorHandler {
impl From<Level> for RequestRevertHandler {
fn from(level: Level) -> Self {
match level {
Level::Trace => RequestErrorHandler::TraceLevel,
Level::Debug => RequestErrorHandler::DebugLevel,
Level::Error => RequestErrorHandler::ErrorLevel,
Level::Warn => RequestErrorHandler::WarnLevel,
Level::Trace => RequestRevertHandler::TraceLevel,
Level::Debug => RequestRevertHandler::DebugLevel,
Level::Error => RequestRevertHandler::ErrorLevel,
Level::Warn => RequestRevertHandler::WarnLevel,
_ => unimplemented!("unexpected tracing Level"),
}
}
@ -84,7 +85,7 @@ impl Authorization {
method: Method,
params: EthCallFirstParams,
) -> anyhow::Result<()> {
let rpc_key_id = match self.checks.rpc_key_id {
let rpc_key_id = match self.checks.rpc_secret_key_id {
Some(rpc_key_id) => rpc_key_id.into(),
None => {
// // trace!(?self, "cannot save revert without rpc_key_id");
@ -213,7 +214,7 @@ impl OpenRequestHandle {
&self,
method: &str,
params: &P,
error_handler: RequestErrorHandler,
revert_handler: RequestRevertHandler,
) -> Result<R, ProviderError>
where
// TODO: not sure about this type. would be better to not need clones, but measure and spawns combine to need it
@ -240,52 +241,58 @@ impl OpenRequestHandle {
Web3Provider::Ws(provider) => provider.request(method, params).await,
};
// TODO: i think ethers already has trace logging (and does it much more fancy)
trace!(
"response from {} for {} {:?}: {:?}",
self.conn,
method,
params,
response,
);
// // TODO: i think ethers already has trace logging (and does it much more fancy)
// trace!(
// "response from {} for {} {:?}: {:?}",
// self.conn,
// method,
// params,
// response,
// );
if let Err(err) = &response {
// only save reverts for some types of calls
// TODO: do something special for eth_sendRawTransaction too
let error_handler = if let RequestErrorHandler::SaveReverts = error_handler {
let revert_handler = if let RequestRevertHandler::Save = revert_handler {
// TODO: should all these be Trace or Debug or a mix?
if !["eth_call", "eth_estimateGas"].contains(&method) {
// trace!(%method, "skipping save on revert");
RequestErrorHandler::TraceLevel
RequestRevertHandler::TraceLevel
} else if self.authorization.db_conn.is_some() {
let log_revert_chance = self.authorization.checks.log_revert_chance;
if log_revert_chance == 0.0 {
// trace!(%method, "no chance. skipping save on revert");
RequestErrorHandler::TraceLevel
RequestRevertHandler::TraceLevel
} else if log_revert_chance == 1.0 {
// trace!(%method, "gaurenteed chance. SAVING on revert");
error_handler
revert_handler
} else if thread_fast_rng::thread_fast_rng().gen_range(0.0f64..=1.0)
< log_revert_chance
{
// trace!(%method, "missed chance. skipping save on revert");
RequestErrorHandler::TraceLevel
RequestRevertHandler::TraceLevel
} else {
// trace!("Saving on revert");
// TODO: is always logging at debug level fine?
error_handler
revert_handler
}
} else {
// trace!(%method, "no database. skipping save on revert");
RequestErrorHandler::TraceLevel
RequestRevertHandler::TraceLevel
}
} else {
error_handler
revert_handler
};
enum ResponseTypes {
Revert,
RateLimit,
Ok,
}
// check for "execution reverted" here
let is_revert = if let ProviderError::JsonRpcClientError(err) = err {
let response_type = if let ProviderError::JsonRpcClientError(err) = err {
// Http and Ws errors are very similar, but different types
let msg = match &*self.provider {
Web3Provider::Mock => unimplemented!(),
@ -310,30 +317,44 @@ impl OpenRequestHandle {
};
if let Some(msg) = msg {
msg.starts_with("execution reverted")
if msg.starts_with("execution reverted") {
trace!("revert from {}", self.conn);
ResponseTypes::Revert
} else if msg.contains("limit") || msg.contains("request") {
trace!("rate limit from {}", self.conn);
ResponseTypes::RateLimit
} else {
ResponseTypes::Ok
}
} else {
false
ResponseTypes::Ok
}
} else {
false
ResponseTypes::Ok
};
if is_revert {
trace!("revert from {}", self.conn);
if matches!(response_type, ResponseTypes::RateLimit) {
if let Some(hard_limit_until) = self.conn.hard_limit_until.as_ref() {
let retry_at = Instant::now() + Duration::from_secs(1);
trace!("retry {} at: {:?}", self.conn, retry_at);
hard_limit_until.send_replace(retry_at);
}
}
// TODO: think more about the method and param logs. those can be sensitive information
match error_handler {
RequestErrorHandler::DebugLevel => {
match revert_handler {
RequestRevertHandler::DebugLevel => {
// TODO: think about this revert check more. sometimes we might want reverts logged so this needs a flag
if !is_revert {
if matches!(response_type, ResponseTypes::Revert) {
debug!(
"bad response from {}! method={} params={:?} err={:?}",
self.conn, method, params, err
);
}
}
RequestErrorHandler::TraceLevel => {
RequestRevertHandler::TraceLevel => {
trace!(
"bad response from {}! method={} params={:?} err={:?}",
self.conn,
@ -342,21 +363,21 @@ impl OpenRequestHandle {
err
);
}
RequestErrorHandler::ErrorLevel => {
RequestRevertHandler::ErrorLevel => {
// TODO: include params if not running in release mode
error!(
"bad response from {}! method={} err={:?}",
self.conn, method, err
);
}
RequestErrorHandler::WarnLevel => {
RequestRevertHandler::WarnLevel => {
// TODO: include params if not running in release mode
warn!(
"bad response from {}! method={} err={:?}",
self.conn, method, err
);
}
RequestErrorHandler::SaveReverts => {
RequestRevertHandler::Save => {
trace!(
"bad response from {}! method={} params={:?} err={:?}",
self.conn,

@ -1,4 +1,4 @@
use super::blockchain::SavedBlock;
use super::blockchain::{ArcBlock, SavedBlock};
use super::connection::Web3Connection;
use super::connections::Web3Connections;
use ethers::prelude::{H256, U64};
@ -9,19 +9,33 @@ use std::sync::Arc;
/// A collection of Web3Connections that are on the same block.
/// Serialize is so we can print it on our debug endpoint
#[derive(Clone, Default, Serialize)]
pub struct SyncedConnections {
pub struct ConsensusConnections {
// TODO: store ArcBlock instead?
pub(super) head_block: Option<SavedBlock>,
// TODO: this should be able to serialize, but it isn't
#[serde(skip_serializing)]
pub(super) conns: Vec<Arc<Web3Connection>>,
pub(super) num_checked_conns: usize,
pub(super) includes_backups: bool,
}
impl fmt::Debug for SyncedConnections {
impl ConsensusConnections {
pub fn num_conns(&self) -> usize {
self.conns.len()
}
pub fn sum_soft_limit(&self) -> u32 {
self.conns.iter().fold(0, |sum, rpc| sum + rpc.soft_limit)
}
// TODO: sum_hard_limit?
}
impl fmt::Debug for ConsensusConnections {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// TODO: the default formatter takes forever to write. this is too quiet though
// TODO: print the actual conns?
f.debug_struct("SyncedConnections")
f.debug_struct("ConsensusConnections")
.field("head_block", &self.head_block)
.field("num_conns", &self.conns.len())
.finish_non_exhaustive()
@ -29,31 +43,29 @@ impl fmt::Debug for SyncedConnections {
}
impl Web3Connections {
pub fn head_block(&self) -> Option<SavedBlock> {
self.synced_connections.load().head_block.clone()
pub fn head_block(&self) -> Option<ArcBlock> {
self.watch_consensus_head_receiver
.as_ref()
.map(|x| x.borrow().clone())
}
pub fn head_block_hash(&self) -> Option<H256> {
self.synced_connections
.load()
.head_block
.as_ref()
.map(|head_block| head_block.hash())
self.head_block().and_then(|x| x.hash)
}
pub fn head_block_num(&self) -> Option<U64> {
self.synced_connections
.load()
.head_block
.as_ref()
.map(|head_block| head_block.number())
self.head_block().and_then(|x| x.number)
}
pub fn synced(&self) -> bool {
!self.synced_connections.load().conns.is_empty()
!self
.watch_consensus_connections_sender
.borrow()
.conns
.is_empty()
}
pub fn num_synced_rpcs(&self) -> usize {
self.synced_connections.load().conns.len()
self.watch_consensus_connections_sender.borrow().conns.len()
}
}