web3-proxy/web3_proxy/src/rpcs/one.rs
2023-07-10 11:05:07 -07:00

1402 lines
49 KiB
Rust

//! Rate-limited communication with a web3 provider.
use super::blockchain::{ArcBlock, BlocksByHashCache, Web3ProxyBlock};
use super::provider::{connect_http, connect_ws, EthersHttpProvider, EthersWsProvider};
use super::request::{OpenRequestHandle, OpenRequestResult};
use crate::app::{flatten_handle, Web3ProxyJoinHandle};
use crate::config::{BlockAndRpc, Web3RpcConfig};
use crate::errors::{Web3ProxyError, Web3ProxyErrorContext, Web3ProxyResult};
use crate::frontend::authorization::Authorization;
use crate::jsonrpc::{JsonRpcParams, JsonRpcResultData};
use crate::rpcs::request::RequestErrorHandler;
use anyhow::{anyhow, Context};
use arc_swap::ArcSwapOption;
use ethers::prelude::{Bytes, Middleware, TxHash, U64};
use ethers::types::{Address, Transaction, U256};
use futures::future::try_join_all;
use futures::StreamExt;
use latency::{EwmaLatency, PeakEwmaLatency, RollingQuantileLatency};
use migration::sea_orm::DatabaseConnection;
use nanorand::Rng;
use redis_rate_limiter::{RedisPool, RedisRateLimitResult, RedisRateLimiter};
use serde::ser::{SerializeStruct, Serializer};
use serde::Serialize;
use serde_json::json;
use std::cmp::Reverse;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::sync::atomic::{self, AtomicU32, AtomicU64, AtomicUsize};
use std::{cmp::Ordering, sync::Arc};
use tokio::sync::{watch, RwLock as AsyncRwLock};
use tokio::time::{interval, sleep, sleep_until, Duration, Instant, MissedTickBehavior};
use tracing::{debug, info, trace, warn, Level};
use url::Url;
/// An active connection to a Web3 RPC server like geth or erigon.
#[derive(Default)]
pub struct Web3Rpc {
pub name: String,
pub block_interval: Duration,
pub display_name: Option<String>,
pub db_conn: Option<DatabaseConnection>,
/// most all requests prefer use the http_provider
pub(super) http_provider: Option<EthersHttpProvider>,
/// the websocket url is only used for subscriptions
pub(super) ws_url: Option<Url>,
/// the websocket provider is only used for subscriptions
pub(super) ws_provider: ArcSwapOption<EthersWsProvider>,
/// keep track of hard limits
/// hard_limit_until is only inside an Option so that the "Default" derive works. it will always be set.
pub(super) hard_limit_until: Option<watch::Sender<Instant>>,
/// rate limits are stored in a central redis so that multiple proxies can share their rate limits
/// We do not use the deferred rate limiter because going over limits would cause errors
pub(super) hard_limit: Option<RedisRateLimiter>,
/// used for ensuring enough requests are available before advancing the head block
pub(super) soft_limit: u32,
/// use web3 queries to find the block data limit for archive/pruned nodes
pub(super) automatic_block_limit: bool,
/// only use this rpc if everything else is lagging too far. this allows us to ignore fast but very low limit rpcs
pub backup: bool,
/// TODO: have an enum for this so that "no limit" prints pretty?
pub(super) block_data_limit: AtomicU64,
/// head_block is only inside an Option so that the "Default" derive works. it will always be set.
pub(super) head_block: Option<watch::Sender<Option<Web3ProxyBlock>>>,
/// Track head block latency.
pub(super) head_delay: AsyncRwLock<EwmaLatency>,
/// Track peak request latency
/// peak_latency is only inside an Option so that the "Default" derive works. it will always be set.
pub(super) peak_latency: Option<PeakEwmaLatency>,
/// Automatically set priority
pub(super) tier: AtomicU32,
/// Track total internal requests served
pub(super) internal_requests: AtomicUsize,
/// Track total external requests served
pub(super) external_requests: AtomicUsize,
/// Track time used by external requests served
/// request_ms_histogram is only inside an Option so that the "Default" derive works. it will always be set.
pub(super) median_latency: Option<RollingQuantileLatency>,
/// Track in-flight requests
pub(super) active_requests: AtomicUsize,
/// disconnect_watch is only inside an Option so that the "Default" derive works. it will always be set.
pub(super) disconnect_watch: Option<watch::Sender<bool>>,
/// created_at is only inside an Option so that the "Default" derive works. it will always be set.
pub(super) created_at: Option<Instant>,
}
impl Web3Rpc {
/// Connect to a web3 rpc
// TODO: have this take a builder (which will have channels attached). or maybe just take the config and give the config public fields
#[allow(clippy::too_many_arguments)]
pub async fn spawn(
config: Web3RpcConfig,
name: String,
chain_id: u64,
db_conn: Option<DatabaseConnection>,
// optional because this is only used for http providers. websocket providers don't use it
http_client: Option<reqwest::Client>,
redis_pool: Option<RedisPool>,
block_interval: Duration,
block_map: BlocksByHashCache,
block_and_rpc_sender: Option<flume::Sender<BlockAndRpc>>,
tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>,
) -> anyhow::Result<(Arc<Web3Rpc>, Web3ProxyJoinHandle<()>)> {
let created_at = Instant::now();
let hard_limit = match (config.hard_limit, redis_pool) {
(None, None) => None,
(Some(hard_limit), Some(redis_pool)) => {
// TODO: in process rate limiter instead? or is deffered good enough?
let rrl = RedisRateLimiter::new(
"web3_proxy",
&format!("{}:{}", chain_id, name),
hard_limit,
60.0,
redis_pool,
);
Some(rrl)
}
(None, Some(_)) => None,
(Some(_hard_limit), None) => {
return Err(anyhow::anyhow!(
"no redis client pool! needed for hard limit"
))
}
};
let tx_id_sender = if config.subscribe_txs {
tx_id_sender
} else {
None
};
let backup = config.backup;
let block_data_limit: AtomicU64 = config.block_data_limit.unwrap_or_default().into();
let automatic_block_limit = (block_data_limit.load(atomic::Ordering::Acquire) == 0)
&& block_and_rpc_sender.is_some();
// have a sender for tracking hard limit anywhere. we use this in case we
// and track on servers that have a configured hard limit
let (hard_limit_until, _) = watch::channel(Instant::now());
if config.ws_url.is_none() && config.http_url.is_none() {
return Err(anyhow!(
"either ws_url or http_url are required. it is best to set both. they must both point to the same server!"
));
}
let (head_block, _) = watch::channel(None);
// Spawn the task for calculting average peak latency
// TODO Should these defaults be in config
let peak_latency = PeakEwmaLatency::spawn(
// Decay over 15s
Duration::from_secs(15),
// Peak requests so far around 5k, we will use an order of magnitude
// more to be safe. Should only use about 50mb RAM
50_000,
// Start latency at 1 second
Duration::from_secs(1),
);
let median_request_latency = RollingQuantileLatency::spawn_median(1_000).await;
let http_provider = if let Some(http_url) = config.http_url {
let http_url = http_url.parse::<Url>()?;
Some(connect_http(http_url, http_client, block_interval)?)
// TODO: check the provider is on the right chain
} else {
None
};
let ws_url = if let Some(ws_url) = config.ws_url {
let ws_url = ws_url.parse::<Url>()?;
Some(ws_url)
} else {
None
};
let (disconnect_watch, _) = watch::channel(false);
let new_rpc = Self {
automatic_block_limit,
backup,
block_data_limit,
block_interval,
created_at: Some(created_at),
db_conn,
display_name: config.display_name,
hard_limit,
hard_limit_until: Some(hard_limit_until),
head_block: Some(head_block),
http_provider,
name,
peak_latency: Some(peak_latency),
median_latency: Some(median_request_latency),
soft_limit: config.soft_limit,
ws_url,
disconnect_watch: Some(disconnect_watch),
..Default::default()
};
let new_connection = Arc::new(new_rpc);
// subscribe to new blocks and new transactions
// subscribing starts the connection (with retries)
// TODO: make transaction subscription optional (just pass None for tx_id_sender)
let handle = {
let new_connection = new_connection.clone();
tokio::spawn(async move {
// TODO: this needs to be a subscribe_with_reconnect that does a retry with jitter and exponential backoff
new_connection
.subscribe_with_reconnect(
block_map,
block_and_rpc_sender,
chain_id,
tx_id_sender,
)
.await
})
};
Ok((new_connection, handle))
}
/// sort by...
/// - backups last
/// - tier (ascending)
/// - block number (descending)
/// TODO: tests on this!
/// TODO: should tier or block number take priority?
/// TODO: should this return a struct that implements sorting traits?
/// TODO: move this to consensus.rs
fn sort_on(&self, max_block: Option<U64>) -> (bool, Reverse<U64>, u32) {
let mut head_block = self
.head_block
.as_ref()
.and_then(|x| x.borrow().as_ref().map(|x| *x.number()))
.unwrap_or_default();
if let Some(max_block) = max_block {
head_block = head_block.min(max_block);
}
let tier = self.tier.load(atomic::Ordering::Relaxed);
let backup = self.backup;
(!backup, Reverse(head_block), tier)
}
/// TODO: move this to consensus.rs
pub fn sort_for_load_balancing_on(
&self,
max_block: Option<U64>,
) -> ((bool, Reverse<U64>, u32), Duration) {
let sort_on = self.sort_on(max_block);
let weighted_peak_latency = self.weighted_peak_latency();
let x = (sort_on, weighted_peak_latency);
trace!("sort_for_load_balancing {}: {:?}", self, x);
x
}
/// like sort_for_load_balancing, but shuffles tiers randomly instead of sorting by weighted_peak_latency
/// TODO: move this to consensus.rs
pub fn shuffle_for_load_balancing_on(
&self,
max_block: Option<U64>,
) -> ((bool, Reverse<U64>, u32), u8) {
let sort_on = self.sort_on(max_block);
let mut rng = nanorand::tls_rng();
let r = rng.generate::<u8>();
(sort_on, r)
}
pub fn weighted_peak_latency(&self) -> Duration {
let peak_latency = if let Some(peak_latency) = self.peak_latency.as_ref() {
peak_latency.latency()
} else {
Duration::from_secs(1)
};
// TODO: what ordering?
let active_requests = self.active_requests.load(atomic::Ordering::Acquire) as f32 + 1.0;
peak_latency.mul_f32(active_requests)
}
// TODO: would be great if rpcs exposed this. see https://github.com/ledgerwatch/erigon/issues/6391
async fn check_block_data_limit(self: &Arc<Self>) -> anyhow::Result<Option<u64>> {
if !self.automatic_block_limit {
// TODO: is this a good thing to return?
return Ok(None);
}
// TODO: check eth_syncing. if it is not false, return Ok(None)
let mut limit = None;
// TODO: binary search between 90k and max?
// TODO: start at 0 or 1?
for block_data_limit in [0, 32, 64, 128, 256, 512, 1024, 90_000, u64::MAX] {
let head_block_num = self
.internal_request::<_, U256>(
"eth_blockNumber",
&[(); 0],
// error here are expected, so keep the level low
Some(Level::DEBUG.into()),
Some(2),
Some(Duration::from_secs(5)),
)
.await
.context("head_block_num error during check_block_data_limit")?;
let maybe_archive_block = head_block_num.saturating_sub((block_data_limit).into());
trace!(
"checking maybe_archive_block on {}: {}",
self,
maybe_archive_block
);
// TODO: wait for the handle BEFORE we check the current block number. it might be delayed too!
// TODO: what should the request be?
let archive_result: Result<Bytes, _> = self
.internal_request(
"eth_getCode",
&json!((
"0xdead00000000000000000000000000000000beef",
maybe_archive_block,
)),
// error here are expected, so keep the level low
Some(Level::TRACE.into()),
Some(2),
Some(Duration::from_secs(5)),
)
.await;
trace!(
"archive_result on {} for {} ({}): {:?}",
self,
block_data_limit,
maybe_archive_block,
archive_result
);
if archive_result.is_err() {
break;
}
limit = Some(block_data_limit);
}
if let Some(limit) = limit {
if limit == 0 {
warn!("{} is unable to serve requests", self);
}
self.block_data_limit
.store(limit, atomic::Ordering::Release);
}
if limit == Some(u64::MAX) {
info!("block data limit on {}: archive", self);
} else {
info!("block data limit on {}: {:?}", self, limit);
}
Ok(limit)
}
/// TODO: this might be too simple. different nodes can prune differently. its possible we will have a block range
pub fn block_data_limit(&self) -> U64 {
self.block_data_limit.load(atomic::Ordering::Acquire).into()
}
/// TODO: get rid of this now that consensus rpcs does it
pub fn has_block_data(&self, needed_block_num: &U64) -> bool {
let head_block_num = match self.head_block.as_ref().unwrap().borrow().as_ref() {
None => return false,
Some(x) => *x.number(),
};
// this rpc doesn't have that block yet. still syncing
if needed_block_num > &head_block_num {
trace!(
"{} has head {} but needs {}",
self,
head_block_num,
needed_block_num,
);
return false;
}
// if this is a pruning node, we might not actually have the block
let block_data_limit: U64 = self.block_data_limit();
let oldest_block_num = head_block_num.saturating_sub(block_data_limit);
if needed_block_num < &oldest_block_num {
trace!(
"{} needs {} but the oldest available is {}",
self,
needed_block_num,
oldest_block_num
);
return false;
}
true
}
/// query the web3 provider to confirm it is on the expected chain with the expected data available
/// TODO: this currently checks only the http if both http and ws are set. it should check both and make sure they match
async fn check_provider(self: &Arc<Self>, chain_id: u64) -> Web3ProxyResult<()> {
// check the server's chain_id here
// TODO: some public rpcs (on bsc and fantom) do not return an id and so this ends up being an error
// TODO: what should the timeout be? should there be a request timeout?
// trace!("waiting on chain id for {}", self);
let found_chain_id: U64 = self
.internal_request(
"eth_chainId",
&[(); 0],
Some(Level::TRACE.into()),
Some(2),
Some(Duration::from_secs(5)),
)
.await?;
trace!("found_chain_id: {:#?}", found_chain_id);
if chain_id != found_chain_id.as_u64() {
return Err(anyhow::anyhow!(
"incorrect chain id! Config has {}, but RPC has {}",
chain_id,
found_chain_id
)
.context(format!("failed @ {}", self))
.into());
}
// TODO: only do this for balanced_rpcs. this errors on 4337 rpcs
self.check_block_data_limit()
.await
.context(format!("unable to check_block_data_limit of {}", self))?;
info!("successfully connected to {}", self);
Ok(())
}
pub(crate) async fn send_head_block_result(
self: &Arc<Self>,
new_head_block: Web3ProxyResult<Option<ArcBlock>>,
block_and_rpc_sender: &flume::Sender<BlockAndRpc>,
block_map: &BlocksByHashCache,
) -> Web3ProxyResult<()> {
let head_block_sender = self.head_block.as_ref().unwrap();
let new_head_block = match new_head_block {
Ok(x) => {
let x = x.and_then(Web3ProxyBlock::try_new);
match x {
None => {
if head_block_sender.borrow().is_none() {
// we previously sent a None. return early
return Ok(());
}
let age = self.created_at.unwrap().elapsed().as_millis();
debug!("clearing head block on {} ({}ms old)!", self, age);
// send an empty block to take this server out of rotation
head_block_sender.send_replace(None);
// TODO: clear self.block_data_limit?
None
}
Some(new_head_block) => {
let new_hash = *new_head_block.hash();
// if we already have this block saved, set new_head_block to that arc. otherwise store this copy
let new_head_block = block_map
.get_with_by_ref(&new_hash, async move { new_head_block })
.await;
// we are synced! yey!
head_block_sender.send_replace(Some(new_head_block.clone()));
if self.block_data_limit() == U64::zero() {
if let Err(err) = self.check_block_data_limit().await {
warn!(
"failed checking block limit after {} finished syncing. {:?}",
self, err
);
}
}
Some(new_head_block)
}
}
}
Err(err) => {
warn!(?err, "unable to get block from {}", self);
// send an empty block to take this server out of rotation
head_block_sender.send_replace(None);
// TODO: clear self.block_data_limit?
None
}
};
// tell web3rpcs about this rpc having this block
block_and_rpc_sender
.send_async((new_head_block, self.clone()))
.await
.context("block_and_rpc_sender failed sending")?;
Ok(())
}
fn should_disconnect(&self) -> bool {
*self.disconnect_watch.as_ref().unwrap().borrow()
}
async fn healthcheck(
self: &Arc<Self>,
error_handler: Option<RequestErrorHandler>,
) -> Web3ProxyResult<()> {
let head_block = self.head_block.as_ref().unwrap().borrow().clone();
if let Some(head_block) = head_block {
let head_block = head_block.block;
// TODO: if head block is very old and not expected to be syncing, emit warning
let block_number = head_block.number.context("no block number")?;
let to = if let Some(txid) = head_block.transactions.last().cloned() {
let tx = self
.internal_request::<_, Option<Transaction>>(
"eth_getTransactionByHash",
&(txid,),
error_handler,
Some(2),
Some(Duration::from_secs(5)),
)
.await?
.context("no transaction")?;
// TODO: what default? something real?
tx.to.unwrap_or_else(|| {
"0xdead00000000000000000000000000000000beef"
.parse::<Address>()
.expect("deafbeef")
})
} else {
"0xdead00000000000000000000000000000000beef"
.parse::<Address>()
.expect("deafbeef")
};
let _code = self
.internal_request::<_, Option<Bytes>>(
"eth_getCode",
&(to, block_number),
error_handler,
Some(2),
Some(Duration::from_secs(5)),
)
.await?;
} else {
// TODO: if head block is none for too long, give an error
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
async fn subscribe_with_reconnect(
self: Arc<Self>,
block_map: BlocksByHashCache,
block_and_rpc_sender: Option<flume::Sender<BlockAndRpc>>,
chain_id: u64,
tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>,
) -> Web3ProxyResult<()> {
loop {
if let Err(err) = self
.clone()
.subscribe(
block_map.clone(),
block_and_rpc_sender.clone(),
chain_id,
tx_id_sender.clone(),
)
.await
{
if self.should_disconnect() {
break;
}
warn!(?err, "subscribe err on {}", self);
} else if self.should_disconnect() {
break;
}
if self.backup {
debug!("reconnecting to {} in 30 seconds", self);
} else {
info!("reconnecting to {} in 30 seconds", self);
}
// TODO: exponential backoff with jitter
sleep(Duration::from_secs(30)).await;
}
Ok(())
}
/// subscribe to blocks and transactions
/// This should only exit when the program is exiting.
/// TODO: should more of these args be on self? chain_id for sure
async fn subscribe(
self: Arc<Self>,
block_map: BlocksByHashCache,
block_and_rpc_sender: Option<flume::Sender<BlockAndRpc>>,
chain_id: u64,
tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>,
) -> Web3ProxyResult<()> {
let error_handler = if self.backup {
Some(RequestErrorHandler::DebugLevel)
} else {
// TODO: info level?
Some(RequestErrorHandler::InfoLevel)
};
if self.should_disconnect() {
return Ok(());
}
if let Some(url) = self.ws_url.clone() {
trace!("starting websocket provider on {}", self);
let x = connect_ws(url, usize::MAX).await?;
let x = Arc::new(x);
self.ws_provider.store(Some(x));
}
if self.should_disconnect() {
return Ok(());
}
trace!("starting subscriptions on {}", self);
self.check_provider(chain_id)
.await
.web3_context("failed check_provider")?;
let mut futures = vec![];
// TODO: use this channel instead of self.disconnect_watch
let (subscribe_stop_tx, subscribe_stop_rx) = watch::channel(false);
// subscribe to the disconnect watch. the app uses this when shutting down or when configs change
if let Some(disconnect_watch_tx) = self.disconnect_watch.as_ref() {
let rpc = self.clone();
let mut disconnect_watch_rx = disconnect_watch_tx.subscribe();
let f = async move {
loop {
if *disconnect_watch_rx.borrow_and_update() {
break;
}
disconnect_watch_rx.changed().await?;
}
info!("disconnect triggered on {}", rpc);
Ok(())
};
futures.push(flatten_handle(tokio::spawn(f)));
}
// health check that runs if there haven't been any recent requests
{
// TODO: move this into a proper function
let rpc = self.clone();
// TODO: how often? different depending on the chain?
// TODO: reset this timeout when a new block is seen? we need to keep median_request_latency updated though
let health_sleep_seconds = 5;
// health check loop
let f = async move {
// TODO: benchmark this and lock contention
let mut old_total_requests = 0;
let mut new_total_requests;
// errors here should not cause the loop to exit!
while !(*subscribe_stop_rx.borrow()) {
new_total_requests = rpc.internal_requests.load(atomic::Ordering::Relaxed)
+ rpc.external_requests.load(atomic::Ordering::Relaxed);
if new_total_requests - old_total_requests < 5 {
// TODO: if this fails too many times, reset the connection
// TODO: move this into a function and the chaining should be easier
if let Err(err) = rpc.healthcheck(error_handler).await {
// TODO: different level depending on the error handler
// TODO: if rate limit error, set "retry_at"
warn!(?err, "health check on {} failed", rpc);
}
}
// TODO: should we count the requests done inside this health check
old_total_requests = new_total_requests;
sleep(Duration::from_secs(health_sleep_seconds)).await;
}
debug!("healthcheck loop on {} exited", rpc);
Ok(())
};
futures.push(flatten_handle(tokio::spawn(f)));
}
// subscribe to new heads
if let Some(block_and_rpc_sender) = block_and_rpc_sender.clone() {
let clone = self.clone();
let subscribe_stop_rx = subscribe_stop_tx.subscribe();
let f = async move {
let x = clone
.subscribe_new_heads(
block_and_rpc_sender.clone(),
block_map.clone(),
subscribe_stop_rx,
)
.await;
// error or success, we clear the block when subscribe_new_heads exits
clone
.send_head_block_result(Ok(None), &block_and_rpc_sender, &block_map)
.await?;
x
};
// TODO: if
futures.push(flatten_handle(tokio::spawn(f)));
}
// subscribe pending transactions
// TODO: make this opt-in. its a lot of bandwidth
if let Some(tx_id_sender) = tx_id_sender {
let subscribe_stop_rx = subscribe_stop_tx.subscribe();
let f = self
.clone()
.subscribe_pending_transactions(tx_id_sender, subscribe_stop_rx);
futures.push(flatten_handle(tokio::spawn(f)));
}
// try_join on the futures
if let Err(err) = try_join_all(futures).await {
warn!(?err, "subscription erred");
}
debug!("subscriptions on {} exited", self);
subscribe_stop_tx.send_replace(true);
// TODO: wait for all of the futures to exit?
// TODO: tell ethers to disconnect?
self.ws_provider.store(None);
Ok(())
}
/// Subscribe to new blocks.
async fn subscribe_new_heads(
self: &Arc<Self>,
block_sender: flume::Sender<BlockAndRpc>,
block_map: BlocksByHashCache,
subscribe_stop_rx: watch::Receiver<bool>,
) -> Web3ProxyResult<()> {
trace!("subscribing to new heads on {}", self);
// TODO: different handler depending on backup or not
let error_handler = None;
let authorization = Default::default();
if let Some(ws_provider) = self.ws_provider.load().as_ref() {
// todo: move subscribe_blocks onto the request handle
let active_request_handle = self
.wait_for_request_handle(&authorization, None, error_handler)
.await;
let mut blocks = ws_provider.subscribe_blocks().await?;
drop(active_request_handle);
// query the block once since the subscription doesn't send the current block
// there is a very small race condition here where the stream could send us a new block right now
// but all seeing the same block twice won't break anything
// TODO: how does this get wrapped in an arc? does ethers handle that?
// TODO: send this request to the ws_provider instead of the http_provider
let latest_block: Result<Option<ArcBlock>, _> = self
.authorized_request(
"eth_getBlockByNumber",
&("latest", false),
&authorization,
Some(Level::WARN.into()),
Some(2),
Some(Duration::from_secs(5)),
)
.await;
self.send_head_block_result(latest_block, &block_sender, &block_map)
.await?;
while let Some(block) = blocks.next().await {
if *subscribe_stop_rx.borrow() {
trace!("stopping ws block subscription on {}", self);
break;
}
let block = Arc::new(block);
self.send_head_block_result(Ok(Some(block)), &block_sender, &block_map)
.await?;
}
} else if self.http_provider.is_some() {
// there is a "watch_blocks" function, but a lot of public nodes (including llamanodes) do not support the necessary rpc endpoints
// TODO: is 1/2 the block time okay?
let mut i = interval(self.block_interval / 2);
i.set_missed_tick_behavior(MissedTickBehavior::Delay);
loop {
if *subscribe_stop_rx.borrow() {
trace!("stopping http block subscription on {}", self);
break;
}
let block_result = self
.authorized_request::<_, Option<ArcBlock>>(
"eth_getBlockByNumber",
&("latest", false),
&authorization,
Some(Level::WARN.into()),
Some(2),
Some(Duration::from_secs(5)),
)
.await;
self.send_head_block_result(block_result, &block_sender, &block_map)
.await?;
i.tick().await;
}
} else {
unimplemented!("no ws or http provider!")
}
// clear the head block. this might not be needed, but it won't hurt
self.send_head_block_result(Ok(None), &block_sender, &block_map)
.await?;
if *subscribe_stop_rx.borrow() {
debug!("new heads subscription exited");
Ok(())
} else {
Err(anyhow!("new_heads subscription exited. reconnect needed").into())
}
}
/// Turn on the firehose of pending transactions
async fn subscribe_pending_transactions(
self: Arc<Self>,
tx_id_sender: flume::Sender<(TxHash, Arc<Self>)>,
mut subscribe_stop_rx: watch::Receiver<bool>,
) -> Web3ProxyResult<()> {
// TODO: check that it actually changed to true
loop {
if *subscribe_stop_rx.borrow_and_update() {
break;
}
subscribe_stop_rx.changed().await?;
}
/*
trace!("watching pending transactions on {}", self);
// TODO: does this keep the lock open for too long?
match provider.as_ref() {
Web3Provider::Http(_provider) => {
// there is a "watch_pending_transactions" function, but a lot of public nodes do not support the necessary rpc endpoints
self.wait_for_disconnect().await?;
}
Web3Provider::Both(_, client) | Web3Provider::Ws(client) => {
// TODO: maybe the subscribe_pending_txs function should be on the active_request_handle
let active_request_handle = self
.wait_for_request_handle(&authorization, None, Some(provider.clone()))
.await?;
let mut stream = client.subscribe_pending_txs().await?;
drop(active_request_handle);
while let Some(pending_tx_id) = stream.next().await {
tx_id_sender
.send_async((pending_tx_id, self.clone()))
.await
.context("tx_id_sender")?;
// TODO: periodically check for listeners. if no one is subscribed, unsubscribe and wait for a subscription
// TODO: select on this instead of checking every loop
if self.should_disconnect() {
break;
}
}
// TODO: is this always an error?
// TODO: we probably don't want a warn and to return error
debug!("pending_transactions subscription ended on {}", self);
}
#[cfg(test)]
Web3Provider::Mock => {
self.wait_for_disconnect().await?;
}
}
*/
if *subscribe_stop_rx.borrow() {
Ok(())
} else {
Err(anyhow!("pending_transactions subscription exited. reconnect needed").into())
}
}
pub async fn wait_for_request_handle(
self: &Arc<Self>,
authorization: &Arc<Authorization>,
max_wait: Option<Duration>,
error_handler: Option<RequestErrorHandler>,
) -> Web3ProxyResult<OpenRequestHandle> {
// TODO: what should the default be?
// TODO: split max_wait_connect (which might wait if a rate limit is pending) and max_wait_request
let max_wait_until = max_wait.map(|x| Instant::now() + x);
loop {
match self.try_request_handle(authorization, error_handler).await {
Ok(OpenRequestResult::Handle(handle)) => return Ok(handle),
Ok(OpenRequestResult::RetryAt(retry_at)) => {
// TODO: emit a stat?
let wait = retry_at.duration_since(Instant::now());
trace!(
"waiting {} millis for request handle on {}",
wait.as_millis(),
self
);
if let Some(max_wait_until) = max_wait_until {
if retry_at > max_wait_until {
// break now since we will wait past our maximum wait time
return Err(Web3ProxyError::Timeout(None));
}
}
sleep_until(retry_at).await;
}
Ok(OpenRequestResult::NotReady) => {
// TODO: when can this happen? log? emit a stat?
trace!("{} has no handle ready", self);
if let Some(max_wait_until) = max_wait_until {
if Instant::now() > max_wait_until {
return Err(Web3ProxyError::NoHandleReady);
}
}
// TODO: sleep how long? maybe just error?
// TODO: instead of an arbitrary sleep, subscribe to the head block on this?
sleep(Duration::from_millis(10)).await;
}
Err(err) => return Err(err),
}
}
}
pub async fn try_request_handle(
self: &Arc<Self>,
authorization: &Arc<Authorization>,
error_handler: Option<RequestErrorHandler>,
) -> Web3ProxyResult<OpenRequestResult> {
// TODO: if websocket is reconnecting, return an error?
// check cached rate limits
if let Some(hard_limit_until) = self.hard_limit_until.as_ref() {
let hard_limit_ready = *hard_limit_until.borrow();
let now = Instant::now();
if now < hard_limit_ready {
return Ok(OpenRequestResult::RetryAt(hard_limit_ready));
}
}
// check shared rate limits
if let Some(ratelimiter) = self.hard_limit.as_ref() {
// TODO: how should we know if we should set expire or not?
match ratelimiter
.throttle()
.await
.context(format!("attempting to throttle {}", self))?
{
RedisRateLimitResult::Allowed(_) => {
// trace!("rate limit succeeded")
}
RedisRateLimitResult::RetryAt(retry_at, _) => {
// rate limit gave us a wait time
// if not a backup server, warn. backups hit rate limits often
if !self.backup {
let when = retry_at.duration_since(Instant::now());
warn!(
retry_ms=%when.as_millis(),
"Exhausted rate limit on {}",
self,
);
}
if let Some(hard_limit_until) = self.hard_limit_until.as_ref() {
hard_limit_until.send_replace(retry_at);
}
return Ok(OpenRequestResult::RetryAt(retry_at));
}
RedisRateLimitResult::RetryNever => {
warn!("how did retry never on {} happen?", self);
return Ok(OpenRequestResult::NotReady);
}
}
};
let handle =
OpenRequestHandle::new(authorization.clone(), self.clone(), error_handler).await;
Ok(handle.into())
}
pub async fn internal_request<P: JsonRpcParams, R: JsonRpcResultData>(
self: &Arc<Self>,
method: &str,
params: &P,
error_handler: Option<RequestErrorHandler>,
max_tries: Option<usize>,
max_wait: Option<Duration>,
) -> Web3ProxyResult<R> {
let authorization = Default::default();
self.authorized_request(
method,
params,
&authorization,
error_handler,
max_tries,
max_wait,
)
.await
}
pub async fn authorized_request<P: JsonRpcParams, R: JsonRpcResultData>(
self: &Arc<Self>,
method: &str,
params: &P,
authorization: &Arc<Authorization>,
error_handler: Option<RequestErrorHandler>,
max_tries: Option<usize>,
max_wait: Option<Duration>,
) -> Web3ProxyResult<R> {
// TODO: take max_wait as a function argument?
let mut tries = max_tries.unwrap_or(1);
let mut last_error: Option<Web3ProxyError> = None;
while tries > 0 {
tries -= 1;
let handle = match self
.wait_for_request_handle(authorization, max_wait, error_handler)
.await
{
Ok(x) => x,
Err(err) => {
last_error = Some(err);
continue;
}
};
match handle.request::<P, R>(method, params).await {
Ok(x) => return Ok(x),
Err(err) => {
last_error = Some(err.into());
continue;
}
}
}
if let Some(last_error) = last_error {
return Err(last_error);
}
Err(anyhow::anyhow!("authorized_request failed in an unexpected way").into())
}
}
impl Hash for Web3Rpc {
fn hash<H: Hasher>(&self, state: &mut H) {
// do not include automatic block limit because it can change
// do not include tier because it can change
self.backup.hash(state);
self.created_at.hash(state);
self.display_name.hash(state);
self.name.hash(state);
// TODO: url does NOT include the authorization data. i think created_at should protect us if auth changes without anything else
self.http_provider.as_ref().map(|x| x.url()).hash(state);
// TODO: figure out how to get the url for the ws provider
// self.ws_provider.map(|x| x.url()).hash(state);
// TODO: don't include soft_limit if we change them to be dynamic
self.soft_limit.hash(state);
}
}
impl Eq for Web3Rpc {}
impl Ord for Web3Rpc {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.name.cmp(&other.name)
}
}
impl PartialOrd for Web3Rpc {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq for Web3Rpc {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
}
}
impl Serialize for Web3Rpc {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
// 14 if we bring head_delay back
let mut state = serializer.serialize_struct("Web3Rpc", 13)?;
// the url is excluded because it likely includes private information. just show the name that we use in keys
state.serialize_field("name", &self.name)?;
// a longer name for display to users
state.serialize_field("display_name", &self.display_name)?;
state.serialize_field("backup", &self.backup)?;
match self.block_data_limit.load(atomic::Ordering::Acquire) {
u64::MAX => {
state.serialize_field("block_data_limit", &None::<()>)?;
}
block_data_limit => {
state.serialize_field("block_data_limit", &block_data_limit)?;
}
}
state.serialize_field("tier", &self.tier)?;
state.serialize_field("soft_limit", &self.soft_limit)?;
// TODO: maybe this is too much data. serialize less?
{
let head_block = self.head_block.as_ref().unwrap();
let head_block = head_block.borrow();
let head_block = head_block.as_ref();
state.serialize_field("head_block", &head_block)?;
}
state.serialize_field(
"external_requests",
&self.external_requests.load(atomic::Ordering::Relaxed),
)?;
state.serialize_field(
"internal_requests",
&self.internal_requests.load(atomic::Ordering::Relaxed),
)?;
state.serialize_field(
"active_requests",
&self.active_requests.load(atomic::Ordering::Relaxed),
)?;
// {
// let head_delay_ms = self.head_delay.read().await.latency().as_secs_f32() * 1000.0;
// state.serialize_field("head_delay_ms", &(head_delay_ms))?;
// }
{
let median_latency_ms = self
.median_latency
.as_ref()
.unwrap()
.latency()
.as_secs_f32()
* 1000.0;
state.serialize_field("median_latency_ms", &(median_latency_ms))?;
}
{
let peak_latency_ms =
self.peak_latency.as_ref().unwrap().latency().as_secs_f32() * 1000.0;
state.serialize_field("peak_latency_ms", &peak_latency_ms)?;
}
{
let weighted_latency_ms = self.weighted_peak_latency().as_secs_f32() * 1000.0;
state.serialize_field("weighted_latency_ms", &weighted_latency_ms)?;
}
state.end()
}
}
impl fmt::Debug for Web3Rpc {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut f = f.debug_struct("Web3Rpc");
f.field("name", &self.name);
let block_data_limit = self.block_data_limit.load(atomic::Ordering::Acquire);
if block_data_limit == u64::MAX {
f.field("blocks", &"all");
} else {
f.field("blocks", &block_data_limit);
}
f.finish_non_exhaustive()
}
}
impl fmt::Display for Web3Rpc {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", &self.name)
}
}
mod tests {
#![allow(unused_imports)]
use super::*;
use ethers::types::{Block, H256, U256};
#[test]
fn test_archive_node_has_block_data() {
let now = chrono::Utc::now().timestamp().into();
let random_block = Block {
hash: Some(H256::random()),
number: Some(1_000_000.into()),
timestamp: now,
..Default::default()
};
let random_block = Arc::new(random_block);
let head_block = Web3ProxyBlock::try_new(random_block).unwrap();
let block_data_limit = u64::MAX;
let (tx, _) = watch::channel(Some(head_block.clone()));
let x = Web3Rpc {
name: "name".to_string(),
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: block_data_limit.into(),
head_block: Some(tx),
..Default::default()
};
assert!(x.has_block_data(&0.into()));
assert!(x.has_block_data(&1.into()));
assert!(x.has_block_data(head_block.number()));
assert!(!x.has_block_data(&(head_block.number() + 1)));
assert!(!x.has_block_data(&(head_block.number() + 1000)));
}
#[test]
fn test_pruned_node_has_block_data() {
let now = chrono::Utc::now().timestamp().into();
let head_block: Web3ProxyBlock = Arc::new(Block {
hash: Some(H256::random()),
number: Some(1_000_000.into()),
timestamp: now,
..Default::default()
})
.try_into()
.unwrap();
let block_data_limit = 64;
let (tx, _rx) = watch::channel(Some(head_block.clone()));
let x = Web3Rpc {
name: "name".to_string(),
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: block_data_limit.into(),
head_block: Some(tx),
..Default::default()
};
assert!(!x.has_block_data(&0.into()));
assert!(!x.has_block_data(&1.into()));
assert!(!x.has_block_data(&(head_block.number() - block_data_limit - 1)));
assert!(x.has_block_data(&(head_block.number() - block_data_limit)));
assert!(x.has_block_data(head_block.number()));
assert!(!x.has_block_data(&(head_block.number() + 1)));
assert!(!x.has_block_data(&(head_block.number() + 1000)));
}
/*
// TODO: think about how to bring the concept of a "lagged" node back
#[test]
fn test_lagged_node_not_has_block_data() {
let now = chrono::Utc::now().timestamp().into();
// head block is an hour old
let head_block = Block {
hash: Some(H256::random()),
number: Some(1_000_000.into()),
timestamp: now - 3600,
..Default::default()
};
let head_block = Arc::new(head_block);
let head_block = Web3ProxyBlock::new(head_block);
let block_data_limit = u64::MAX;
let metrics = OpenRequestHandleMetrics::default();
let x = Web3Rpc {
name: "name".to_string(),
db_conn: None,
display_name: None,
url: "ws://example.com".to_string(),
http_client: None,
active_requests: 0.into(),
frontend_requests: 0.into(),
internal_requests: 0.into(),
provider_state: AsyncRwLock::new(ProviderState::None),
hard_limit: None,
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: block_data_limit.into(),
tier: 0,
head_block: AsyncRwLock::new(Some(head_block.clone())),
};
assert!(!x.has_block_data(&0.into()));
assert!(!x.has_block_data(&1.into()));
assert!(!x.has_block_data(&head_block.number()));
assert!(!x.has_block_data(&(head_block.number() + 1)));
assert!(!x.has_block_data(&(head_block.number() + 1000)));
}
*/
}