web3-proxy/web3_proxy/src/rpcs/many.rs
2023-07-20 14:32:52 -07:00

2040 lines
75 KiB
Rust

//! Load balanced communication with a group of web3 rpc providers
use super::blockchain::{BlocksByHashCache, BlocksByNumberCache, Web3ProxyBlock};
use super::consensus::{RankedRpcs, ShouldWaitForBlock};
use super::one::Web3Rpc;
use super::request::{OpenRequestHandle, OpenRequestResult, RequestErrorHandler};
use crate::app::{flatten_handle, Web3ProxyApp, Web3ProxyJoinHandle};
use crate::config::{average_block_interval, BlockAndRpc, Web3RpcConfig};
use crate::errors::{Web3ProxyError, Web3ProxyResult};
use crate::frontend::authorization::{Authorization, RequestMetadata};
use crate::frontend::rpc_proxy_ws::ProxyMode;
use crate::frontend::status::MokaCacheSerializer;
use crate::jsonrpc::{JsonRpcErrorData, JsonRpcParams, JsonRpcResultData};
use counter::Counter;
use derive_more::From;
use ethers::prelude::{ProviderError, U64};
use futures::future::try_join_all;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use hashbrown::HashMap;
use itertools::Itertools;
use moka::future::CacheBuilder;
use parking_lot::RwLock;
use serde::ser::{SerializeStruct, Serializer};
use serde::Serialize;
use serde_json::json;
use serde_json::value::RawValue;
use std::borrow::Cow;
use std::cmp::min_by_key;
use std::fmt::{self, Display};
use std::sync::atomic::Ordering;
use std::sync::Arc;
use tokio::select;
use tokio::sync::{mpsc, watch};
use tokio::time::{sleep, sleep_until, Duration, Instant};
use tracing::{debug, error, info, trace, warn};
/// A collection of web3 connections. Sends requests either the current best server or all servers.
#[derive(From)]
pub struct Web3Rpcs {
pub(crate) name: Cow<'static, str>,
pub(crate) chain_id: u64,
/// if watch_consensus_head_sender is some, Web3Rpc inside self will send blocks here when they get them
pub(crate) block_sender: mpsc::UnboundedSender<(Option<Web3ProxyBlock>, Arc<Web3Rpc>)>,
/// any requests will be forwarded to one (or more) of these connections
/// TODO: hopefully this not being an async lock will be okay. if you need it across awaits, clone the arc
pub(crate) by_name: RwLock<HashMap<String, Arc<Web3Rpc>>>,
/// all providers with the same consensus head block. won't update if there is no `self.watch_consensus_head_sender`
/// TODO: document that this is a watch sender and not a broadcast! if things get busy, blocks might get missed
/// TODO: why is watch_consensus_head_sender in an Option, but this one isn't?
/// Geth's subscriptions have the same potential for skipping blocks.
pub(crate) watch_ranked_rpcs: watch::Sender<Option<Arc<RankedRpcs>>>,
/// this head receiver makes it easy to wait until there is a new block
pub(super) watch_head_block: Option<watch::Sender<Option<Web3ProxyBlock>>>,
/// TODO: this map is going to grow forever unless we do some sort of pruning. maybe store pruned in redis?
/// all blocks, including orphans
pub(super) blocks_by_hash: BlocksByHashCache,
/// blocks on the heaviest chain
pub(super) blocks_by_number: BlocksByNumberCache,
/// the number of rpcs required to agree on consensus for the head block (thundering herd protection)
pub(super) min_synced_rpcs: usize,
/// the soft limit required to agree on consensus for the head block. (thundering herd protection)
pub(super) min_sum_soft_limit: u32,
/// how far behind the highest known block height we can be before we stop serving requests
pub(super) max_head_block_lag: U64,
/// how old our consensus head block we can be before we stop serving requests
/// calculated based on max_head_block_lag and averge block times
pub(super) max_head_block_age: Duration,
}
impl Web3Rpcs {
/// Spawn durable connections to multiple Web3 providers.
pub async fn spawn(
chain_id: u64,
max_head_block_lag: Option<U64>,
min_head_rpcs: usize,
min_sum_soft_limit: u32,
name: Cow<'static, str>,
watch_consensus_head_sender: Option<watch::Sender<Option<Web3ProxyBlock>>>,
) -> anyhow::Result<(
Arc<Self>,
Web3ProxyJoinHandle<()>,
watch::Receiver<Option<Arc<RankedRpcs>>>,
)> {
let (block_sender, block_receiver) = mpsc::unbounded_channel::<BlockAndRpc>();
// these blocks don't have full transactions, but they do have rather variable amounts of transaction hashes
// TODO: actual weighter on this
// TODO: time_to_idle instead?
let blocks_by_hash: BlocksByHashCache = CacheBuilder::new(1_000)
.name("blocks_by_hash")
.time_to_idle(Duration::from_secs(30 * 60))
.build();
// all block numbers are the same size, so no need for weigher
// TODO: limits from config
// TODO: time_to_idle instead?
let blocks_by_number = CacheBuilder::new(1_000)
.name("blocks_by_number")
.time_to_idle(Duration::from_secs(30 * 60))
.build();
let (watch_consensus_rpcs_sender, consensus_connections_watcher) =
watch::channel(Default::default());
// by_name starts empty. self.apply_server_configs will add to it
let by_name = RwLock::new(HashMap::new());
let max_head_block_lag = max_head_block_lag.unwrap_or(5.into());
let max_head_block_age =
average_block_interval(chain_id).mul_f32((max_head_block_lag.as_u64() * 10) as f32);
let connections = Arc::new(Self {
block_sender,
blocks_by_hash,
blocks_by_number,
by_name,
chain_id,
max_head_block_age,
max_head_block_lag,
min_synced_rpcs: min_head_rpcs,
min_sum_soft_limit,
name,
watch_head_block: watch_consensus_head_sender,
watch_ranked_rpcs: watch_consensus_rpcs_sender,
});
let handle = {
let connections = connections.clone();
tokio::spawn(connections.subscribe(block_receiver))
};
Ok((connections, handle, consensus_connections_watcher))
}
/// update the rpcs in this group
pub async fn apply_server_configs(
&self,
app: &Web3ProxyApp,
rpc_configs: HashMap<String, Web3RpcConfig>,
) -> Web3ProxyResult<()> {
// safety checks
if rpc_configs.len() < app.config.min_synced_rpcs {
// TODO: don't count disabled servers!
// TODO: include if this is balanced, private, or 4337
warn!(
"Only {}/{} rpcs! Add more rpcs or reduce min_synced_rpcs.",
rpc_configs.len(),
app.config.min_synced_rpcs
);
return Ok(());
}
// safety check on sum soft limit
// TODO: will need to think about this more once sum_soft_limit is dynamic
let sum_soft_limit = rpc_configs.values().fold(0, |acc, x| acc + x.soft_limit);
// TODO: require a buffer?
if sum_soft_limit < self.min_sum_soft_limit {
return Err(Web3ProxyError::NotEnoughSoftLimit {
available: sum_soft_limit,
needed: self.min_sum_soft_limit,
});
}
let chain_id = app.config.chain_id;
let block_interval = average_block_interval(chain_id);
let mut names_to_keep = vec![];
// turn configs into connections (in parallel)
let mut spawn_handles: FuturesUnordered<_> = rpc_configs
.into_iter()
.filter_map(|(server_name, server_config)| {
if server_config.disabled {
info!("{} is disabled", server_name);
return None;
}
let http_client = app.http_client.clone();
let vredis_pool = app.vredis_pool.clone();
let block_sender = if self.watch_head_block.is_some() {
Some(self.block_sender.clone())
} else {
None
};
let blocks_by_hash_cache = self.blocks_by_hash.clone();
debug!("spawning tasks for {}", server_name);
names_to_keep.push(server_name.clone());
let handle = tokio::spawn(server_config.spawn(
server_name,
vredis_pool,
chain_id,
block_interval,
http_client,
blocks_by_hash_cache,
block_sender,
self.max_head_block_age,
));
Some(handle)
})
.collect();
while let Some(x) = spawn_handles.next().await {
match x {
Ok(Ok((new_rpc, _handle))) => {
// web3 connection worked
let old_rpc = self.by_name.read().get(&new_rpc.name).map(Arc::clone);
// clean up the old rpc if it exists
if let Some(old_rpc) = old_rpc {
trace!("old_rpc: {}", old_rpc);
// if the old rpc was synced, wait for the new one to sync
if old_rpc.head_block.as_ref().unwrap().borrow().is_some() {
let mut new_head_receiver =
new_rpc.head_block.as_ref().unwrap().subscribe();
trace!("waiting for new {} connection to sync", new_rpc);
// TODO: maximum wait time
while new_head_receiver.borrow_and_update().is_none() {
if new_head_receiver.changed().await.is_err() {
break;
};
}
}
// new rpc is synced (or old one was not synced). update the local map
// make sure that any new requests use the new connection
self.by_name.write().insert(new_rpc.name.clone(), new_rpc);
// tell the old rpc to disconnect
if let Some(ref disconnect_sender) = old_rpc.disconnect_watch {
debug!("telling old {} to disconnect", old_rpc);
disconnect_sender.send_replace(true);
}
} else {
self.by_name.write().insert(new_rpc.name.clone(), new_rpc);
}
}
Ok(Err(err)) => {
// if we got an error here, the app can continue on
// TODO: include context about which connection failed
// TODO: retry automatically
error!("Unable to create connection. err={:?}", err);
}
Err(err) => {
// something actually bad happened. exit with an error
return Err(err.into());
}
}
}
// TODO: remove any RPCs that were part of the config, but are now removed
let active_names: Vec<_> = self.by_name.read().keys().cloned().collect();
for name in active_names {
if names_to_keep.contains(&name) {
continue;
}
if let Some(old_rpc) = self.by_name.write().remove(&name) {
if let Some(ref disconnect_sender) = old_rpc.disconnect_watch {
debug!("telling {} to disconnect. no longer needed", old_rpc);
disconnect_sender.send_replace(true);
}
}
}
let num_rpcs = self.len();
if num_rpcs < self.min_synced_rpcs {
return Err(Web3ProxyError::NotEnoughRpcs {
num_known: num_rpcs,
min_head_rpcs: self.min_synced_rpcs,
});
}
Ok(())
}
pub fn get(&self, conn_name: &str) -> Option<Arc<Web3Rpc>> {
self.by_name.read().get(conn_name).cloned()
}
pub fn len(&self) -> usize {
self.by_name.read().len()
}
pub fn is_empty(&self) -> bool {
self.by_name.read().is_empty()
}
/// TODO: rename to be consistent between "head" and "synced"
pub fn min_head_rpcs(&self) -> usize {
self.min_synced_rpcs
}
/// subscribe to blocks and transactions from all the backend rpcs.
/// blocks are processed by all the `Web3Rpc`s and then sent to the `block_receiver`
/// transaction ids from all the `Web3Rpc`s are deduplicated and forwarded to `pending_tx_sender`
async fn subscribe(
self: Arc<Self>,
block_receiver: mpsc::UnboundedReceiver<BlockAndRpc>,
) -> Web3ProxyResult<()> {
let mut futures = vec![];
// // setup the transaction funnel
// // it skips any duplicates (unless they are being orphaned)
// // fetches new transactions from the notifying rpc
// // forwards new transacitons to pending_tx_receipt_sender
// if let Some(pending_tx_sender) = pending_tx_sender.clone() {
// let clone = self.clone();
// let handle = tokio::task::spawn(async move {
// // TODO: set up this future the same as the block funnel
// while let Some((pending_tx_id, rpc)) =
// clone.pending_tx_id_receiver.write().await.recv().await
// {
// let f = clone.clone().process_incoming_tx_id(
// rpc,
// pending_tx_id,
// pending_tx_sender.clone(),
// );
// tokio::spawn(f);
// }
// Ok(())
// });
// futures.push(flatten_handle(handle));
// }
// setup the block funnel
if self.watch_head_block.is_some() {
let connections = Arc::clone(&self);
let handle = tokio::task::Builder::default()
.name("process_incoming_blocks")
.spawn(async move { connections.process_incoming_blocks(block_receiver).await })?;
futures.push(flatten_handle(handle));
}
if futures.is_empty() {
// no transaction or block subscriptions.
let handle = tokio::task::Builder::default()
.name("noop")
.spawn(async move {
loop {
sleep(Duration::from_secs(600)).await;
// TODO: "every interval, do a health check or disconnect the rpc"
}
})?;
futures.push(flatten_handle(handle));
}
if let Err(e) = try_join_all(futures).await {
error!(?self, "subscriptions over");
return Err(e);
}
info!(?self, "subscriptions over");
Ok(())
}
/// Send the same request to all the handles. Returning the most common success or most common error.
/// TODO: option to return the fastest response and handles for all the others instead?
pub async fn try_send_parallel_requests<P: JsonRpcParams>(
&self,
active_request_handles: Vec<OpenRequestHandle>,
method: &str,
params: &P,
// TODO: remove this box once i figure out how to do the options
) -> Result<Box<RawValue>, ProviderError> {
// TODO: if only 1 active_request_handles, do self.try_send_request?
// TODO: iter stream
let responses = active_request_handles
.into_iter()
.map(|active_request_handle| async move {
let result: Result<Box<RawValue>, _> =
active_request_handle.request(method, &json!(&params)).await;
result
})
.collect::<FuturesUnordered<_>>()
.collect::<Vec<Result<Box<RawValue>, ProviderError>>>()
.await;
// TODO: Strings are not great keys, but we can't use RawValue or ProviderError as keys because they don't implement Hash or Eq
let mut count_map: HashMap<String, _> = HashMap::new();
let mut counts: Counter<String> = Counter::new();
let mut any_ok_with_json_result = false;
for partial_response in responses {
if partial_response.is_ok() {
any_ok_with_json_result = true;
}
// TODO: better key!
let s = format!("{:?}", partial_response);
if count_map.get(&s).is_none() {
count_map.insert(s.clone(), partial_response);
}
counts.update([s]);
}
// return the most_common success if any. otherwise return the most_common error
for (most_common, _) in counts.most_common_ordered() {
let most_common = count_map
.remove(&most_common)
.expect("most_common key must exist");
match most_common {
Ok(x) => {
// return the most common success
return Ok(x);
}
Err(err) => {
if any_ok_with_json_result {
// the most common is an error, but there is an Ok in here somewhere. continue the loop to find it
continue;
}
return Err(err);
}
}
}
// TODO: what should we do if we get here? i don't think we will
unimplemented!("this shouldn't be possible")
}
async fn _best_available_rpc(
&self,
authorization: &Arc<Authorization>,
error_handler: Option<RequestErrorHandler>,
potential_rpcs: &[Arc<Web3Rpc>],
skip: &mut Vec<Arc<Web3Rpc>>,
) -> OpenRequestResult {
let mut earliest_retry_at = None;
for (rpc_a, rpc_b) in potential_rpcs.iter().circular_tuple_windows() {
trace!("{} vs {}", rpc_a, rpc_b);
// TODO: ties within X% to the server with the smallest block_data_limit
// faster rpc. backups always lose.
let faster_rpc = min_by_key(rpc_a, rpc_b, |x| (x.backup, x.weighted_peak_latency()));
trace!("winner: {}", faster_rpc);
// add to the skip list in case this one fails
skip.push(Arc::clone(faster_rpc));
// just because it has lower latency doesn't mean we are sure to get a connection. there might be rate limits
// TODO: what error_handler?
match faster_rpc
.try_request_handle(authorization, error_handler)
.await
{
Ok(OpenRequestResult::Handle(handle)) => {
trace!("opened handle: {}", faster_rpc);
return OpenRequestResult::Handle(handle);
}
Ok(OpenRequestResult::RetryAt(retry_at)) => {
trace!(
"retry on {} @ {}",
faster_rpc,
retry_at.duration_since(Instant::now()).as_secs_f32()
);
if earliest_retry_at.is_none() {
earliest_retry_at = Some(retry_at);
} else {
earliest_retry_at = earliest_retry_at.min(Some(retry_at));
}
}
Ok(OpenRequestResult::NotReady) => {
// TODO: log a warning? emit a stat?
trace!("best_rpc not ready: {}", faster_rpc);
}
Err(err) => {
trace!("No request handle for {}. err={:?}", faster_rpc, err)
}
}
}
if let Some(retry_at) = earliest_retry_at {
OpenRequestResult::RetryAt(retry_at)
} else {
OpenRequestResult::NotReady
}
}
pub async fn wait_for_best_rpc(
&self,
request_metadata: Option<&Arc<RequestMetadata>>,
skip_rpcs: &mut Vec<Arc<Web3Rpc>>,
min_block_needed: Option<&U64>,
max_block_needed: Option<&U64>,
max_wait: Option<Duration>,
error_handler: Option<RequestErrorHandler>,
) -> Web3ProxyResult<OpenRequestResult> {
let start = Instant::now();
let mut earliest_retry_at: Option<Instant> = None;
// TODO: pass db_conn to the "default" authorization for revert logging
let authorization = request_metadata
.and_then(|x| x.authorization.clone())
.unwrap_or_default();
let mut watch_ranked_rpcs = self.watch_ranked_rpcs.subscribe();
let mut potential_rpcs = Vec::new();
loop {
// TODO: need a change so that protected and 4337 rpcs set watch_consensus_rpcs on start
let ranked_rpcs: Option<Arc<RankedRpcs>> =
watch_ranked_rpcs.borrow_and_update().clone();
// first check everything that is synced
// even though we might be querying an old block that an unsynced server can handle,
// it is best to not send queries to a syncing server. that slows down sync and can bloat erigon's disk usage.
if let Some(ranked_rpcs) = ranked_rpcs {
potential_rpcs.extend(
ranked_rpcs
.all()
.iter()
.filter(|rpc| {
ranked_rpcs.rpc_will_work_now(
skip_rpcs,
min_block_needed,
max_block_needed,
rpc,
)
})
.cloned(),
);
if potential_rpcs.len() >= self.min_synced_rpcs {
// we have enough potential rpcs. try to load balance
potential_rpcs.sort_by_cached_key(|x| {
x.shuffle_for_load_balancing_on(max_block_needed.copied())
});
match self
._best_available_rpc(
&authorization,
error_handler,
&potential_rpcs,
skip_rpcs,
)
.await
{
OpenRequestResult::Handle(x) => return Ok(OpenRequestResult::Handle(x)),
OpenRequestResult::NotReady => {}
OpenRequestResult::RetryAt(retry_at) => {
if earliest_retry_at.is_none() {
earliest_retry_at = Some(retry_at);
} else {
earliest_retry_at = earliest_retry_at.min(Some(retry_at));
}
}
}
}
let waiting_for = min_block_needed.max(max_block_needed);
if let Some(max_wait) = max_wait {
match ranked_rpcs.should_wait_for_block(waiting_for, skip_rpcs) {
ShouldWaitForBlock::NeverReady => break,
ShouldWaitForBlock::Ready => {
if start.elapsed() > max_wait {
break;
}
}
ShouldWaitForBlock::Wait { .. } => select! {
_ = watch_ranked_rpcs.changed() => {
// no need to borrow_and_update because we do that at the top of the loop
},
_ = sleep_until(start + max_wait) => break,
},
}
}
} else if let Some(max_wait) = max_wait {
select! {
_ = watch_ranked_rpcs.changed() => {
// no need to borrow_and_update because we do that at the top of the loop
},
_ = sleep_until(start + max_wait) => break,
}
} else {
break;
}
// clear for the next loop
potential_rpcs.clear();
}
if let Some(request_metadata) = request_metadata {
request_metadata.no_servers.fetch_add(1, Ordering::AcqRel);
}
if let Some(retry_at) = earliest_retry_at {
// TODO: log the server that retry_at came from
warn!(
?skip_rpcs,
retry_in_s=?retry_at.duration_since(Instant::now()).as_secs_f32(),
"no servers in {} ready!",
self,
);
Ok(OpenRequestResult::RetryAt(retry_at))
} else {
warn!(?skip_rpcs, "no servers in {} ready!", self);
Ok(OpenRequestResult::NotReady)
}
}
/// get all rpc servers that are not rate limited
/// this prefers synced servers, but it will return servers even if they aren't fully in sync.
/// This is useful for broadcasting signed transactions.
// TODO: better type on this that can return an anyhow::Result
// TODO: this is broken
pub async fn all_connections(
&self,
request_metadata: Option<&Arc<RequestMetadata>>,
min_block_needed: Option<&U64>,
max_block_needed: Option<&U64>,
max_count: Option<usize>,
error_level: Option<RequestErrorHandler>,
) -> Result<Vec<OpenRequestHandle>, Option<Instant>> {
let mut earliest_retry_at = None;
// TODO: filter the rpcs with Ranked.will_work_now
let mut all_rpcs: Vec<_> = self.by_name.read().values().cloned().collect();
let mut max_count = if let Some(max_count) = max_count {
max_count
} else {
all_rpcs.len()
};
trace!("max_count: {}", max_count);
if max_count == 0 {
// TODO: return a future that resolves when we know a head block?
return Err(None);
}
let mut selected_rpcs = Vec::with_capacity(max_count);
// TODO: this sorts them all even though we probably won't need all of them. think about this more
all_rpcs.sort_by_cached_key(|x| x.sort_for_load_balancing_on(max_block_needed.copied()));
trace!("all_rpcs: {:#?}", all_rpcs);
let authorization = request_metadata
.and_then(|x| x.authorization.clone())
.unwrap_or_default();
for rpc in all_rpcs {
trace!("trying {}", rpc);
// TODO: use a helper function for these
if let Some(block_needed) = min_block_needed {
if !rpc.has_block_data(block_needed) {
trace!("{} is missing min_block_needed. skipping", rpc);
continue;
}
}
if let Some(block_needed) = max_block_needed {
if !rpc.has_block_data(block_needed) {
trace!("{} is missing max_block_needed. skipping", rpc);
continue;
}
}
// check rate limits and increment our connection counter
match rpc.try_request_handle(&authorization, error_level).await {
Ok(OpenRequestResult::RetryAt(retry_at)) => {
// this rpc is not available. skip it
trace!("{} is rate limited. skipping", rpc);
earliest_retry_at = earliest_retry_at.min(Some(retry_at));
}
Ok(OpenRequestResult::Handle(handle)) => {
trace!("{} is available", rpc);
selected_rpcs.push(handle);
max_count -= 1;
if max_count == 0 {
break;
}
}
Ok(OpenRequestResult::NotReady) => {
warn!("no request handle for {}", rpc)
}
Err(err) => {
warn!(?err, "error getting request handle for {}", rpc)
}
}
}
if !selected_rpcs.is_empty() {
return Ok(selected_rpcs);
}
// return the earliest retry_after (if no rpcs are synced, this will be None)
Err(earliest_retry_at)
}
pub async fn internal_request<P: JsonRpcParams, R: JsonRpcResultData>(
&self,
method: &str,
params: &P,
max_tries: Option<usize>,
max_wait: Option<Duration>,
) -> Web3ProxyResult<R> {
// TODO: no request_metadata means we won't have stats on this internal request.
self.request_with_metadata_and_retries(
method, params, None, max_tries, max_wait, None, None,
)
.await
}
/// Make a request with stat tracking.
#[allow(clippy::too_many_arguments)]
pub async fn request_with_metadata_and_retries<P: JsonRpcParams, R: JsonRpcResultData>(
&self,
method: &str,
params: &P,
request_metadata: Option<&Arc<RequestMetadata>>,
max_tries: Option<usize>,
max_wait: Option<Duration>,
min_block_needed: Option<&U64>,
max_block_needed: Option<&U64>,
) -> Web3ProxyResult<R> {
let mut tries = max_tries.unwrap_or(1);
let mut last_error = None;
while tries > 0 {
tries -= 1;
match self
.request_with_metadata(
method,
params,
request_metadata,
max_wait,
min_block_needed,
max_block_needed,
)
.await
{
Ok(x) => return Ok(x),
Err(Web3ProxyError::JsonRpcErrorData(err)) => {
// TODO: retry some of these? i think request_with_metadata is already smart enough though
return Err(err.into());
}
Err(err) => {
// TODO: only log params in dev
warn!(rpc=%self, %method, ?params, ?err, %tries, "retry-able error");
last_error = Some(err)
}
}
}
if let Some(err) = last_error {
return Err(err);
}
Err(anyhow::anyhow!("no response, but no error either. this is a bug").into())
}
/// Make a request with stat tracking.
pub async fn request_with_metadata<P: JsonRpcParams, R: JsonRpcResultData>(
&self,
method: &str,
params: &P,
request_metadata: Option<&Arc<RequestMetadata>>,
max_wait: Option<Duration>,
min_block_needed: Option<&U64>,
max_block_needed: Option<&U64>,
) -> Web3ProxyResult<R> {
let mut skip_rpcs = vec![];
let mut method_not_available_response = None;
let mut watch_consensus_rpcs = self.watch_ranked_rpcs.subscribe();
let start = Instant::now();
// set error_handler to Save. this might be overridden depending on the request_metadata.authorization
let error_handler = Some(RequestErrorHandler::Save);
let mut last_provider_error = None;
// TODO: the loop here feels somewhat redundant with the loop in best_available_rpc
loop {
if let Some(max_wait) = max_wait {
if start.elapsed() > max_wait {
trace!("max_wait exceeded");
break;
}
}
match self
.wait_for_best_rpc(
request_metadata,
&mut skip_rpcs,
min_block_needed,
max_block_needed,
max_wait,
error_handler,
)
.await?
{
OpenRequestResult::Handle(active_request_handle) => {
// save the rpc in case we get an error and want to retry on another server
// TODO: look at backend_requests instead
let rpc = active_request_handle.clone_connection();
if let Some(request_metadata) = request_metadata {
request_metadata.backend_requests.lock().push(rpc.clone());
}
let is_backup_response = rpc.backup;
match active_request_handle.request::<P, R>(method, params).await {
Ok(response) => {
// TODO: if there are multiple responses being aggregated, this will only use the last server's backup type
if let Some(request_metadata) = request_metadata {
request_metadata
.response_from_backup_rpc
.store(is_backup_response, Ordering::Release);
request_metadata
.user_error_response
.store(false, Ordering::Release);
request_metadata
.error_response
.store(false, Ordering::Release);
}
return Ok(response);
}
Err(error) => {
// TODO: if this is an error, do NOT return. continue to try on another server
let error = match JsonRpcErrorData::try_from(&error) {
Ok(x) => {
if let Some(request_metadata) = request_metadata {
request_metadata
.user_error_response
.store(true, Ordering::Release);
}
x
}
Err(err) => {
warn!(?err, "error from {}", rpc);
if let Some(request_metadata) = request_metadata {
request_metadata
.error_response
.store(true, Ordering::Release);
request_metadata
.user_error_response
.store(false, Ordering::Release);
}
last_provider_error = Some(error);
continue;
}
};
// some errors should be retried on other nodes
let error_msg = error.message.as_ref();
// different providers do different codes. check all of them
// TODO: there's probably more strings to add here
let rate_limit_substrings = ["limit", "exceeded", "quota usage"];
for rate_limit_substr in rate_limit_substrings {
if error_msg.contains(rate_limit_substr) {
if error_msg.contains("block size") {
// TODO: this message is likely wrong, but i can't find the actual one in my terminal now
// they hit an expected limit. return the error now
return Err(error.into());
} else if error_msg.contains("result on length") {
// this error contains "limit" but is not a rate limit error
// TODO: make the expected limit configurable
// TODO: parse the rate_limit_substr and only continue if it is < expected limit
if error_msg.contains("exceeding limit 2000000")
|| error_msg.ends_with(
"exceeding --rpc.returndata.limit 2000000",
)
{
// they hit our expected limit. return the error now
return Err(error.into());
} else {
// they hit a limit lower than what we expect. the server is misconfigured
error!(
%error_msg,
"unexpected result limit by {}",
skip_rpcs.last().unwrap(),
);
continue;
}
} else {
warn!(
%error_msg,
"rate limited by {}",
skip_rpcs.last().unwrap()
);
continue;
}
}
}
match error.code {
-32000 => {
// TODO: regex?
let retry_prefixes = [
"header not found",
"header for hash not found",
"missing trie node",
"node not started",
"RPC timeout",
];
for retry_prefix in retry_prefixes {
if error_msg.starts_with(retry_prefix) {
// TODO: too verbose
debug!("retrying on another server");
continue;
}
}
}
-32601 => {
let error_msg = error.message.as_ref();
// sometimes a provider does not support all rpc methods
// we check other connections rather than returning the error
// but sometimes the method is something that is actually unsupported,
// so we save the response here to return it later
// some providers look like this
if error_msg.starts_with("the method")
&& error_msg.ends_with("is not available")
{
method_not_available_response = Some(error);
continue;
}
// others look like this (this is the example in the official spec)
if error_msg == "Method not found" {
method_not_available_response = Some(error);
continue;
}
}
_ => {}
}
// let rpc = skip_rpcs
// .last()
// .expect("there must have been a provider if we got an error");
// TODO: emit a stat. if a server is getting skipped a lot, something is not right
// TODO: if we get a TrySendError, reconnect. wait why do we see a trysenderror on a dual provider? shouldn't it be using reqwest
// TODO! WRONG! ONLY SET RETRY_AT IF THIS IS A SERVER/CONNECTION ERROR. JSONRPC "error" is FINE
// trace!(
// "Backend server error on {}! Retrying {:?} on another. err={:?}",
// rpc,
// request,
// error,
// );
// if let Some(ref hard_limit_until) = rpc.hard_limit_until {
// let retry_at = Instant::now() + Duration::from_secs(1);
// hard_limit_until.send_replace(retry_at);
// }
return Err(error.into());
}
}
}
OpenRequestResult::RetryAt(retry_at) => {
// TODO: move this to a helper function
// sleep (TODO: with a lock?) until our rate limits should be available
// TODO: if a server catches up sync while we are waiting, we could stop waiting
warn!(
"All rate limits exceeded. waiting for change in synced servers or {:?}s",
retry_at.duration_since(Instant::now()).as_secs_f32()
);
// TODO: have a separate column for rate limited?
if let Some(request_metadata) = request_metadata {
request_metadata.no_servers.fetch_add(1, Ordering::AcqRel);
}
tokio::select! {
_ = sleep_until(retry_at) => {
trace!("slept!");
skip_rpcs.pop();
}
_ = watch_consensus_rpcs.changed() => {
watch_consensus_rpcs.borrow_and_update();
}
}
}
OpenRequestResult::NotReady => {
if let Some(request_metadata) = request_metadata {
request_metadata
.error_response
.store(true, Ordering::Release);
}
break;
}
}
}
if let Some(err) = method_not_available_response {
if let Some(request_metadata) = request_metadata {
request_metadata
.error_response
.store(false, Ordering::Release);
request_metadata
.user_error_response
.store(true, Ordering::Release);
}
// this error response is likely the user's fault
// TODO: emit a stat for unsupported methods. then we can know what there is demand for or if we are missing a feature
return Err(err.into());
}
if let Some(err) = last_provider_error {
return Err(err.into());
}
let num_conns = self.len();
let num_skipped = skip_rpcs.len();
let needed = min_block_needed.max(max_block_needed);
let head_block_num = watch_consensus_rpcs
.borrow()
.as_ref()
.map(|x| *x.head_block.number());
// TODO: error? warn? debug? trace?
if head_block_num.is_none() {
error!(
min=?min_block_needed,
max=?max_block_needed,
head=?head_block_num,
known=num_conns,
%method,
?params,
"No servers synced",
);
} else if head_block_num.as_ref() > needed {
// we have synced past the needed block
// TODO: log ranked rpcs
// TODO: only log params in development
error!(
min=?min_block_needed,
max=?max_block_needed,
head=?head_block_num,
known=%num_conns,
%method,
?params,
"No archive servers synced",
);
} else {
// TODO: only log params in development
// TODO: log ranked rpcs
error!(
min=?min_block_needed,
max=?max_block_needed,
head=?head_block_num,
skipped=%num_skipped,
known=%num_conns,
%method,
?params,
"Requested data is not available",
);
}
// TODO: what error code?
// cloudflare gives {"jsonrpc":"2.0","error":{"code":-32043,"message":"Requested data cannot be older than 128 blocks."},"id":1}
Err(JsonRpcErrorData {
message: "Requested data is not available".into(),
code: -32043,
data: None,
}
.into())
}
/// be sure there is a timeout on this or it might loop forever
#[allow(clippy::too_many_arguments)]
pub async fn try_send_all_synced_connections<P: JsonRpcParams>(
self: &Arc<Self>,
method: &str,
params: &P,
request_metadata: Option<&Arc<RequestMetadata>>,
min_block_needed: Option<&U64>,
max_block_needed: Option<&U64>,
max_wait: Option<Duration>,
error_level: Option<RequestErrorHandler>,
max_sends: Option<usize>,
) -> Web3ProxyResult<Box<RawValue>> {
let mut watch_consensus_rpcs = self.watch_ranked_rpcs.subscribe();
let start = Instant::now();
loop {
if let Some(max_wait) = max_wait {
if start.elapsed() > max_wait {
break;
}
}
match self
.all_connections(
request_metadata,
min_block_needed,
max_block_needed,
max_sends,
error_level,
)
.await
{
Ok(active_request_handles) => {
if let Some(request_metadata) = request_metadata {
let mut only_backups_used = true;
request_metadata.backend_requests.lock().extend(
active_request_handles.iter().map(|x| {
let rpc = x.clone_connection();
if !rpc.backup {
// TODO: its possible we serve from a synced connection though. think about this more
only_backups_used = false;
}
rpc
}),
);
request_metadata
.response_from_backup_rpc
.store(only_backups_used, Ordering::Release);
}
let x = self
.try_send_parallel_requests(active_request_handles, method, params)
.await?;
return Ok(x);
}
Err(None) => {
warn!(
?self,
?min_block_needed,
?max_block_needed,
"No servers in sync on! Retrying",
);
if let Some(request_metadata) = &request_metadata {
// TODO: if this times out, i think we drop this
request_metadata.no_servers.fetch_add(1, Ordering::AcqRel);
}
let max_sleep = if let Some(max_wait) = max_wait {
start + max_wait
} else {
break;
};
tokio::select! {
_ = sleep_until(max_sleep) => {
// rpcs didn't change and we have waited too long. break to return an error
warn!(?self, "timeout waiting for try_send_all_synced_connections!");
break;
},
_ = watch_consensus_rpcs.changed() => {
// consensus rpcs changed!
watch_consensus_rpcs.borrow_and_update();
// continue to try again
continue;
}
}
}
Err(Some(retry_at)) => {
if let Some(request_metadata) = &request_metadata {
request_metadata.no_servers.fetch_add(1, Ordering::AcqRel);
}
if let Some(max_wait) = max_wait {
if start.elapsed() > max_wait {
warn!(
?self,
"All rate limits exceeded. And sleeping would take too long"
);
break;
}
warn!("All rate limits exceeded. Sleeping");
// TODO: only make one of these sleep_untils
tokio::select! {
_ = sleep_until(start + max_wait) => {break}
_ = sleep_until(retry_at) => {}
_ = watch_consensus_rpcs.changed() => {
watch_consensus_rpcs.borrow_and_update();
}
}
continue;
} else {
warn!(?self, "all rate limits exceeded");
break;
}
}
}
}
Err(Web3ProxyError::NoServersSynced)
}
#[allow(clippy::too_many_arguments)]
pub async fn try_proxy_connection<P: JsonRpcParams, R: JsonRpcResultData>(
&self,
method: &str,
params: &P,
request_metadata: Option<&Arc<RequestMetadata>>,
max_tries: Option<usize>,
max_wait: Option<Duration>,
min_block_needed: Option<&U64>,
max_block_needed: Option<&U64>,
) -> Web3ProxyResult<R> {
let proxy_mode = request_metadata.map(|x| x.proxy_mode()).unwrap_or_default();
match proxy_mode {
ProxyMode::Debug | ProxyMode::Best => {
self.request_with_metadata_and_retries(
method,
params,
request_metadata,
max_tries,
max_wait,
min_block_needed,
max_block_needed,
)
.await
}
ProxyMode::Fastest(_x) => todo!("Fastest"),
ProxyMode::Versus => todo!("Versus"),
}
}
}
impl Display for Web3Rpcs {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.name)
}
}
impl fmt::Debug for Web3Rpcs {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// TODO: the default formatter takes forever to write. this is too quiet though
let consensus_rpcs = self.watch_ranked_rpcs.borrow().is_some();
f.debug_struct("Web3Rpcs")
.field("rpcs", &self.by_name)
.field("consensus_rpcs", &consensus_rpcs)
.finish_non_exhaustive()
}
}
impl Serialize for Web3Rpcs {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut state = serializer.serialize_struct("Web3Rpcs", 5)?;
{
let by_name = self.by_name.read();
let rpcs: Vec<&Arc<Web3Rpc>> = by_name.values().collect();
// TODO: coordinate with frontend team to rename "conns" to "rpcs"
state.serialize_field("conns", &rpcs)?;
}
{
let consensus_rpcs = self.watch_ranked_rpcs.borrow().clone();
// TODO: rename synced_connections to consensus_rpcs
if let Some(consensus_rpcs) = consensus_rpcs.as_ref() {
state.serialize_field("synced_connections", consensus_rpcs)?;
} else {
state.serialize_field("synced_connections", &None::<()>)?;
}
}
state.serialize_field(
"caches",
&(
MokaCacheSerializer(&self.blocks_by_hash),
MokaCacheSerializer(&self.blocks_by_number),
),
)?;
state.serialize_field(
"watch_consensus_rpcs_receivers",
&self.watch_ranked_rpcs.receiver_count(),
)?;
if let Some(ref x) = self.watch_head_block {
state.serialize_field("watch_consensus_head_receivers", &x.receiver_count())?;
} else {
state.serialize_field("watch_consensus_head_receivers", &None::<()>)?;
}
state.end()
}
}
mod tests {
#![allow(unused_imports)]
use super::*;
use crate::rpcs::blockchain::Web3ProxyBlock;
use crate::rpcs::consensus::ConsensusFinder;
use arc_swap::ArcSwap;
use ethers::types::H256;
use ethers::types::{Block, U256};
use latency::PeakEwmaLatency;
use moka::future::{Cache, CacheBuilder};
use std::time::{SystemTime, UNIX_EPOCH};
use tracing::trace;
#[cfg(test)]
fn new_peak_latency() -> PeakEwmaLatency {
PeakEwmaLatency::spawn(Duration::from_secs(1), 4, Duration::from_secs(1))
}
#[test_log::test(tokio::test)]
async fn test_sort_connections_by_sync_status() {
let block_0 = Block {
number: Some(0.into()),
hash: Some(H256::random()),
..Default::default()
};
let block_1 = Block {
number: Some(1.into()),
hash: Some(H256::random()),
parent_hash: block_0.hash.unwrap(),
..Default::default()
};
let block_2 = Block {
number: Some(2.into()),
hash: Some(H256::random()),
parent_hash: block_1.hash.unwrap(),
..Default::default()
};
let blocks: Vec<_> = [block_0, block_1, block_2]
.into_iter()
.map(|x| Web3ProxyBlock::try_new(Arc::new(x)).unwrap())
.collect();
let (tx_a, _) = watch::channel(None);
let (tx_b, _) = watch::channel(blocks.get(1).cloned());
let (tx_c, _) = watch::channel(blocks.get(2).cloned());
let (tx_d, _) = watch::channel(None);
let (tx_e, _) = watch::channel(blocks.get(1).cloned());
let (tx_f, _) = watch::channel(blocks.get(2).cloned());
let mut rpcs: Vec<_> = [
Web3Rpc {
name: "a".to_string(),
tier: 0.into(),
head_block: Some(tx_a),
peak_latency: Some(new_peak_latency()),
..Default::default()
},
Web3Rpc {
name: "b".to_string(),
tier: 0.into(),
head_block: Some(tx_b),
peak_latency: Some(new_peak_latency()),
..Default::default()
},
Web3Rpc {
name: "c".to_string(),
tier: 0.into(),
head_block: Some(tx_c),
peak_latency: Some(new_peak_latency()),
..Default::default()
},
Web3Rpc {
name: "d".to_string(),
tier: 1.into(),
head_block: Some(tx_d),
peak_latency: Some(new_peak_latency()),
..Default::default()
},
Web3Rpc {
name: "e".to_string(),
tier: 1.into(),
head_block: Some(tx_e),
peak_latency: Some(new_peak_latency()),
..Default::default()
},
Web3Rpc {
name: "f".to_string(),
tier: 1.into(),
head_block: Some(tx_f),
peak_latency: Some(new_peak_latency()),
..Default::default()
},
]
.into_iter()
.map(Arc::new)
.collect();
rpcs.sort_by_cached_key(|x| x.sort_for_load_balancing_on(None));
let names_in_sort_order: Vec<_> = rpcs.iter().map(|x| x.name.as_str()).collect();
// assert_eq!(names_in_sort_order, ["c", "b", "a", "f", "e", "d"]);
assert_eq!(names_in_sort_order, ["c", "f", "b", "e", "a", "d"]);
}
#[test_log::test(tokio::test)]
async fn test_server_selection_by_height() {
let now = chrono::Utc::now().timestamp().into();
let lagged_block = Block {
hash: Some(H256::random()),
number: Some(0.into()),
timestamp: now - 1,
..Default::default()
};
let head_block = Block {
hash: Some(H256::random()),
number: Some(1.into()),
parent_hash: lagged_block.hash.unwrap(),
timestamp: now,
..Default::default()
};
let lagged_block = Arc::new(lagged_block);
let head_block = Arc::new(head_block);
let block_data_limit = u64::MAX;
let (tx_synced, _) = watch::channel(None);
let head_rpc = Web3Rpc {
name: "synced".to_string(),
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: block_data_limit.into(),
head_block: Some(tx_synced),
peak_latency: Some(new_peak_latency()),
..Default::default()
};
let (tx_lagged, _) = watch::channel(None);
let lagged_rpc = Web3Rpc {
name: "lagged".to_string(),
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: block_data_limit.into(),
head_block: Some(tx_lagged),
peak_latency: Some(new_peak_latency()),
..Default::default()
};
assert!(!head_rpc.has_block_data(lagged_block.number.as_ref().unwrap()));
assert!(!head_rpc.has_block_data(head_block.number.as_ref().unwrap()));
assert!(!lagged_rpc.has_block_data(lagged_block.number.as_ref().unwrap()));
assert!(!lagged_rpc.has_block_data(head_block.number.as_ref().unwrap()));
let head_rpc = Arc::new(head_rpc);
let lagged_rpc = Arc::new(lagged_rpc);
let (block_sender, _block_receiver) = mpsc::unbounded_channel();
let (watch_ranked_rpcs, _watch_consensus_rpcs_receiver) = watch::channel(None);
let (watch_consensus_head_sender, _watch_consensus_head_receiver) = watch::channel(None);
let chain_id = 1;
let mut by_name = HashMap::new();
by_name.insert(head_rpc.name.clone(), head_rpc.clone());
by_name.insert(lagged_rpc.name.clone(), lagged_rpc.clone());
// TODO: make a Web3Rpcs::new
let rpcs = Web3Rpcs {
block_sender: block_sender.clone(),
by_name: RwLock::new(by_name),
chain_id,
name: "test".into(),
watch_head_block: Some(watch_consensus_head_sender),
watch_ranked_rpcs,
blocks_by_hash: CacheBuilder::new(100)
.time_to_live(Duration::from_secs(60))
.build(),
blocks_by_number: CacheBuilder::new(100)
.time_to_live(Duration::from_secs(60))
.build(),
// TODO: test max_head_block_age?
max_head_block_age: Duration::from_secs(60),
// TODO: test max_head_block_lag?
max_head_block_lag: 5.into(),
min_synced_rpcs: 1,
min_sum_soft_limit: 1,
};
let mut consensus_finder = ConsensusFinder::new(None, None);
consensus_finder
.process_block_from_rpc(&rpcs, None, lagged_rpc.clone())
.await
.expect(
"its lagged, but it should still be seen as consensus if its the first to report",
);
consensus_finder
.process_block_from_rpc(&rpcs, None, head_rpc.clone())
.await
.unwrap();
// no head block because the rpcs haven't communicated through their channels
assert!(rpcs.head_block_hash().is_none());
// all_backend_connections gives all non-backup servers regardless of sync status
assert_eq!(
rpcs.all_connections(None, None, None, None, None)
.await
.unwrap()
.len(),
2
);
// best_synced_backend_connection which servers to be synced with the head block should not find any nodes
let x = rpcs
.wait_for_best_rpc(
None,
&mut vec![],
Some(head_block.number.as_ref().unwrap()),
None,
Some(Duration::from_secs(0)),
Some(RequestErrorHandler::DebugLevel),
)
.await
.unwrap();
info!(?x);
assert!(matches!(x, OpenRequestResult::NotReady));
// add lagged blocks to the rpcs. both servers should be allowed
lagged_rpc
.send_head_block_result(
Ok(Some(lagged_block.clone())),
&block_sender,
&rpcs.blocks_by_hash,
)
.await
.unwrap();
// TODO: calling process_block_from_rpc and send_head_block_result seperate seems very fragile
consensus_finder
.process_block_from_rpc(
&rpcs,
Some(lagged_block.clone().try_into().unwrap()),
lagged_rpc.clone(),
)
.await
.unwrap();
head_rpc
.send_head_block_result(
Ok(Some(lagged_block.clone())),
&block_sender,
&rpcs.blocks_by_hash,
)
.await
.unwrap();
// TODO: this is fragile
consensus_finder
.process_block_from_rpc(
&rpcs,
Some(lagged_block.clone().try_into().unwrap()),
head_rpc.clone(),
)
.await
.unwrap();
// TODO: how do we spawn this and wait for it to process things? subscribe and watch consensus connections?
// rpcs.process_incoming_blocks(block_receiver, pending_tx_sender)
assert!(head_rpc.has_block_data(lagged_block.number.as_ref().unwrap()));
assert!(!head_rpc.has_block_data(head_block.number.as_ref().unwrap()));
assert!(lagged_rpc.has_block_data(lagged_block.number.as_ref().unwrap()));
assert!(!lagged_rpc.has_block_data(head_block.number.as_ref().unwrap()));
assert_eq!(rpcs.num_synced_rpcs(), 2);
// add head block to the rpcs. lagged_rpc should not be available
head_rpc
.send_head_block_result(
Ok(Some(head_block.clone())),
&block_sender,
&rpcs.blocks_by_hash,
)
.await
.unwrap();
// TODO: this is fragile
consensus_finder
.process_block_from_rpc(
&rpcs,
Some(head_block.clone().try_into().unwrap()),
head_rpc.clone(),
)
.await
.unwrap();
assert_eq!(rpcs.num_synced_rpcs(), 1);
assert!(head_rpc.has_block_data(lagged_block.number.as_ref().unwrap()));
assert!(head_rpc.has_block_data(head_block.number.as_ref().unwrap()));
assert!(lagged_rpc.has_block_data(lagged_block.number.as_ref().unwrap()));
assert!(!lagged_rpc.has_block_data(head_block.number.as_ref().unwrap()));
// TODO: make sure the handle is for the expected rpc
assert!(matches!(
rpcs.wait_for_best_rpc(
None,
&mut vec![],
None,
None,
Some(Duration::from_secs(0)),
None,
)
.await,
Ok(OpenRequestResult::Handle(_))
));
// TODO: make sure the handle is for the expected rpc
assert!(matches!(
rpcs.wait_for_best_rpc(
None,
&mut vec![],
Some(&0.into()),
None,
Some(Duration::from_secs(0)),
None,
)
.await,
Ok(OpenRequestResult::Handle(_))
));
// TODO: make sure the handle is for the expected rpc
assert!(matches!(
rpcs.wait_for_best_rpc(
None,
&mut vec![],
Some(&1.into()),
None,
Some(Duration::from_secs(0)),
None,
)
.await,
Ok(OpenRequestResult::Handle(_))
));
// future block should not get a handle
let future_rpc = rpcs
.wait_for_best_rpc(
None,
&mut vec![],
Some(&2.into()),
None,
Some(Duration::from_secs(0)),
None,
)
.await;
assert!(matches!(future_rpc, Ok(OpenRequestResult::NotReady)));
}
#[test_log::test(tokio::test)]
async fn test_server_selection_by_archive() {
let now = chrono::Utc::now().timestamp().into();
let head_block = Block {
hash: Some(H256::random()),
number: Some(1_000_000.into()),
parent_hash: H256::random(),
timestamp: now,
..Default::default()
};
let head_block: Web3ProxyBlock = Arc::new(head_block).try_into().unwrap();
let (tx_pruned, _) = watch::channel(Some(head_block.clone()));
let pruned_rpc = Web3Rpc {
name: "pruned".to_string(),
soft_limit: 3_000,
automatic_block_limit: false,
backup: false,
block_data_limit: 64.into(),
tier: 1.into(),
head_block: Some(tx_pruned),
..Default::default()
};
let (tx_archive, _) = watch::channel(Some(head_block.clone()));
let archive_rpc = Web3Rpc {
name: "archive".to_string(),
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: u64::MAX.into(),
tier: 2.into(),
head_block: Some(tx_archive),
..Default::default()
};
assert!(pruned_rpc.has_block_data(head_block.number()));
assert!(archive_rpc.has_block_data(head_block.number()));
assert!(!pruned_rpc.has_block_data(&1.into()));
assert!(archive_rpc.has_block_data(&1.into()));
let pruned_rpc = Arc::new(pruned_rpc);
let archive_rpc = Arc::new(archive_rpc);
let (block_sender, _) = mpsc::unbounded_channel();
let (watch_ranked_rpcs, _) = watch::channel(None);
let (watch_consensus_head_sender, _watch_consensus_head_receiver) = watch::channel(None);
let chain_id = 1;
let mut by_name = HashMap::new();
by_name.insert(pruned_rpc.name.clone(), pruned_rpc.clone());
by_name.insert(archive_rpc.name.clone(), archive_rpc.clone());
let rpcs = Web3Rpcs {
block_sender,
by_name: RwLock::new(by_name),
chain_id,
name: "test".into(),
watch_head_block: Some(watch_consensus_head_sender),
watch_ranked_rpcs,
blocks_by_hash: CacheBuilder::new(100)
.time_to_live(Duration::from_secs(120))
.build(),
blocks_by_number: CacheBuilder::new(100)
.time_to_live(Duration::from_secs(120))
.build(),
min_synced_rpcs: 1,
min_sum_soft_limit: 4_000,
max_head_block_age: Duration::from_secs(60),
max_head_block_lag: 5.into(),
};
let mut connection_heads = ConsensusFinder::new(None, None);
// min sum soft limit will require 2 servers
let x = connection_heads
.process_block_from_rpc(&rpcs, Some(head_block.clone()), pruned_rpc.clone())
.await
.unwrap();
assert!(!x);
assert_eq!(rpcs.num_synced_rpcs(), 0);
let x = connection_heads
.process_block_from_rpc(&rpcs, Some(head_block.clone()), archive_rpc.clone())
.await
.unwrap();
assert!(x);
assert_eq!(rpcs.num_synced_rpcs(), 2);
// best_synced_backend_connection requires servers to be synced with the head block
// TODO: test with and without passing the head_block.number?
let best_available_server = rpcs
.wait_for_best_rpc(
None,
&mut vec![],
Some(head_block.number()),
None,
Some(Duration::from_secs(0)),
None,
)
.await;
debug!("best_available_server: {:#?}", best_available_server);
assert!(matches!(
best_available_server.unwrap(),
OpenRequestResult::Handle(_)
));
let _best_available_server_from_none = rpcs
.wait_for_best_rpc(
None,
&mut vec![],
None,
None,
Some(Duration::from_secs(0)),
None,
)
.await;
// assert_eq!(best_available_server, best_available_server_from_none);
let best_archive_server = rpcs
.wait_for_best_rpc(
None,
&mut vec![],
Some(&1.into()),
None,
Some(Duration::from_secs(0)),
None,
)
.await;
match best_archive_server {
Ok(OpenRequestResult::Handle(x)) => {
assert_eq!(x.clone_connection().name, "archive".to_string())
}
x => {
error!("unexpected result: {:?}", x);
}
}
}
#[test_log::test(tokio::test)]
async fn test_all_connections() {
// TODO: use chrono, not SystemTime
let now: U256 = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_secs()
.into();
let block_1 = Block {
hash: Some(H256::random()),
number: Some(1_000_000.into()),
parent_hash: H256::random(),
timestamp: now,
..Default::default()
};
let block_2 = Block {
hash: Some(H256::random()),
number: Some(1_000_001.into()),
parent_hash: block_1.hash.unwrap(),
timestamp: now + 1,
..Default::default()
};
let block_1: Web3ProxyBlock = Arc::new(block_1).try_into().unwrap();
let block_2: Web3ProxyBlock = Arc::new(block_2).try_into().unwrap();
let (tx_mock_geth, _) = watch::channel(Some(block_1.clone()));
let (tx_mock_erigon_archive, _) = watch::channel(Some(block_2.clone()));
let mock_geth = Web3Rpc {
name: "mock_geth".to_string(),
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: 64.into(),
// tier: 0,
head_block: Some(tx_mock_geth),
peak_latency: Some(new_peak_latency()),
..Default::default()
};
let mock_erigon_archive = Web3Rpc {
name: "mock_erigon_archive".to_string(),
soft_limit: 1_000,
automatic_block_limit: false,
backup: false,
block_data_limit: u64::MAX.into(),
// tier: 1,
head_block: Some(tx_mock_erigon_archive),
peak_latency: Some(new_peak_latency()),
..Default::default()
};
assert!(mock_geth.has_block_data(block_1.number()));
assert!(mock_erigon_archive.has_block_data(block_1.number()));
assert!(!mock_geth.has_block_data(block_2.number()));
assert!(mock_erigon_archive.has_block_data(block_2.number()));
let mock_geth = Arc::new(mock_geth);
let mock_erigon_archive = Arc::new(mock_erigon_archive);
let (block_sender, _) = mpsc::unbounded_channel();
let (watch_ranked_rpcs, _) = watch::channel(None);
let (watch_consensus_head_sender, _watch_consensus_head_receiver) = watch::channel(None);
let chain_id = 1;
let mut by_name = HashMap::new();
by_name.insert(mock_geth.name.clone(), mock_geth.clone());
by_name.insert(
mock_erigon_archive.name.clone(),
mock_erigon_archive.clone(),
);
// TODO: make a Web3Rpcs::new
let rpcs = Web3Rpcs {
block_sender,
by_name: RwLock::new(by_name),
chain_id,
name: "test".into(),
watch_head_block: Some(watch_consensus_head_sender),
watch_ranked_rpcs,
blocks_by_hash: Cache::new(10_000),
blocks_by_number: Cache::new(10_000),
min_synced_rpcs: 1,
min_sum_soft_limit: 1_000,
max_head_block_age: Duration::from_secs(60),
max_head_block_lag: 5.into(),
};
let mut consensus_finder = ConsensusFinder::new(None, None);
consensus_finder
.process_block_from_rpc(&rpcs, Some(block_1.clone()), mock_geth.clone())
.await
.unwrap();
consensus_finder
.process_block_from_rpc(&rpcs, Some(block_2.clone()), mock_erigon_archive.clone())
.await
.unwrap();
assert_eq!(rpcs.num_synced_rpcs(), 1);
// best_synced_backend_connection requires servers to be synced with the head block
// TODO: test with and without passing the head_block.number?
let head_connections = rpcs
.all_connections(None, Some(block_2.number()), None, None, None)
.await;
debug!("head_connections: {:#?}", head_connections);
assert_eq!(
head_connections.unwrap().len(),
1,
"wrong number of connections"
);
let all_connections = rpcs
.all_connections(None, Some(block_1.number()), None, None, None)
.await;
debug!("all_connections: {:#?}", all_connections);
assert_eq!(
all_connections.unwrap().len(),
2,
"wrong number of connections"
);
let all_connections = rpcs.all_connections(None, None, None, None, None).await;
debug!("all_connections: {:#?}", all_connections);
assert_eq!(
all_connections.unwrap().len(),
2,
"wrong number of connections"
)
}
}
#[cfg(test)]
mod test {
use std::cmp::Reverse;
#[test]
fn test_block_num_sort() {
let test_vec = vec![
Reverse(Some(3)),
Reverse(Some(2)),
Reverse(Some(1)),
Reverse(None),
];
let mut sorted_vec = test_vec.clone();
sorted_vec.sort();
assert_eq!(test_vec, sorted_vec);
}
#[test]
fn test_bool_sort() {
let test_vec = vec![false, true];
let mut sorted_vec = test_vec.clone();
sorted_vec.sort();
assert_eq!(test_vec, sorted_vec);
}
}