sort on sync status
This commit is contained in:
parent
2edf0cf4b3
commit
9e457154a3
21
TODO.md
21
TODO.md
@ -1,3 +1,24 @@
|
|||||||
# Todo
|
# Todo
|
||||||
|
|
||||||
- [ ] tarpit ratelimiting at the start, but reject if incoming requests is super high
|
- [ ] tarpit ratelimiting at the start, but reject if incoming requests is super high
|
||||||
|
- [ ] thundering herd problem if we only allow a lag of 1 block. soft rate limits should help
|
||||||
|
|
||||||
|
# notes
|
||||||
|
its almost working. when i curl it, it doesn't work exactly right though
|
||||||
|
|
||||||
|
## first time:
|
||||||
|
|
||||||
|
```
|
||||||
|
thread 'tokio-runtime-worker' panicked at 'not implemented', src/provider_tiers.rs:142:13
|
||||||
|
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
|
||||||
|
```
|
||||||
|
|
||||||
|
I think this is not seeing any as in sync. not sure why else it would not have any not_until set.
|
||||||
|
I believe this is because we don't know the first block. we should force an update or something at the start
|
||||||
|
|
||||||
|
## second time:
|
||||||
|
"false"
|
||||||
|
|
||||||
|
it loses all the "jsonrpc" parts and just has the simple result. need to return a proper jsonrpc response
|
||||||
|
|
||||||
|
# TODO: add the backend server to the header
|
||||||
|
@ -14,8 +14,41 @@ pub type NewHead = (String, Block<TxHash>);
|
|||||||
pub type BlockWatcherSender = mpsc::UnboundedSender<NewHead>;
|
pub type BlockWatcherSender = mpsc::UnboundedSender<NewHead>;
|
||||||
pub type BlockWatcherReceiver = mpsc::UnboundedReceiver<NewHead>;
|
pub type BlockWatcherReceiver = mpsc::UnboundedReceiver<NewHead>;
|
||||||
|
|
||||||
|
#[derive(Eq)]
|
||||||
|
// TODO: ethers has a similar SyncingStatus
|
||||||
|
pub enum SyncStatus {
|
||||||
|
Synced(u64),
|
||||||
|
Behind(u64),
|
||||||
|
Unknown,
|
||||||
|
}
|
||||||
|
|
||||||
|
// impl Ord for SyncStatus {
|
||||||
|
// fn cmp(&self, other: &Self) -> cmp::Ordering {
|
||||||
|
// self.height.cmp(&other.height)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// impl PartialOrd for SyncStatus {
|
||||||
|
// fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
|
||||||
|
// Some(self.cmp(other))
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
impl PartialEq for SyncStatus {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
match (self, other) {
|
||||||
|
(Self::Synced(a), Self::Synced(b)) => a == b,
|
||||||
|
(Self::Unknown, Self::Unknown) => true,
|
||||||
|
(Self::Behind(a), Self::Behind(b)) => a == b,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct BlockWatcher {
|
pub struct BlockWatcher {
|
||||||
sender: BlockWatcherSender,
|
sender: BlockWatcherSender,
|
||||||
|
/// parking_lot::Mutex is supposed to be faster, but we only lock this once, so its fine
|
||||||
receiver: Mutex<BlockWatcherReceiver>,
|
receiver: Mutex<BlockWatcherReceiver>,
|
||||||
block_numbers: DashMap<String, u64>,
|
block_numbers: DashMap<String, u64>,
|
||||||
head_block_number: AtomicU64,
|
head_block_number: AtomicU64,
|
||||||
@ -37,24 +70,30 @@ impl BlockWatcher {
|
|||||||
self.sender.clone()
|
self.sender.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn is_synced(&self, rpc: String, allowed_lag: u64) -> anyhow::Result<bool> {
|
pub fn sync_status(&self, rpc: &str, allowed_lag: u64) -> SyncStatus {
|
||||||
match (
|
match (
|
||||||
self.head_block_number.load(atomic::Ordering::SeqCst),
|
self.head_block_number.load(atomic::Ordering::SeqCst),
|
||||||
self.block_numbers.get(&rpc),
|
self.block_numbers.get(rpc),
|
||||||
) {
|
) {
|
||||||
(0, _) => Ok(false),
|
(0, _) => SyncStatus::Unknown,
|
||||||
(_, None) => Ok(false),
|
(_, None) => SyncStatus::Unknown,
|
||||||
(head_block_number, Some(rpc_block_number)) => {
|
(head_block_number, Some(rpc_block_number)) => {
|
||||||
match head_block_number.cmp(&rpc_block_number) {
|
match head_block_number.cmp(&rpc_block_number) {
|
||||||
cmp::Ordering::Equal => Ok(true),
|
cmp::Ordering::Equal => SyncStatus::Synced(0),
|
||||||
cmp::Ordering::Greater => {
|
cmp::Ordering::Greater => {
|
||||||
// this probably won't happen, but it might if the block arrives at the exact wrong time
|
// this probably won't happen, but it might if the block arrives at the exact wrong time
|
||||||
Ok(true)
|
// TODO: should this be negative?
|
||||||
|
SyncStatus::Synced(0)
|
||||||
}
|
}
|
||||||
cmp::Ordering::Less => {
|
cmp::Ordering::Less => {
|
||||||
// allow being some behind
|
// allow being some behind
|
||||||
let lag = head_block_number - *rpc_block_number;
|
let lag = head_block_number - *rpc_block_number;
|
||||||
Ok(lag <= allowed_lag)
|
|
||||||
|
if lag <= allowed_lag {
|
||||||
|
SyncStatus::Synced(lag)
|
||||||
|
} else {
|
||||||
|
SyncStatus::Behind(lag)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -90,6 +129,7 @@ impl BlockWatcher {
|
|||||||
let head_number = self.head_block_number.load(atomic::Ordering::SeqCst);
|
let head_number = self.head_block_number.load(atomic::Ordering::SeqCst);
|
||||||
|
|
||||||
let label_slow_heads = if head_number == 0 {
|
let label_slow_heads = if head_number == 0 {
|
||||||
|
// first block seen
|
||||||
self.head_block_number
|
self.head_block_number
|
||||||
.swap(new_block_number, atomic::Ordering::SeqCst);
|
.swap(new_block_number, atomic::Ordering::SeqCst);
|
||||||
"+".to_string()
|
"+".to_string()
|
||||||
@ -98,7 +138,7 @@ impl BlockWatcher {
|
|||||||
// TODO: alert if there is a large chain split?
|
// TODO: alert if there is a large chain split?
|
||||||
match (new_block_number).cmp(&head_number) {
|
match (new_block_number).cmp(&head_number) {
|
||||||
cmp::Ordering::Equal => {
|
cmp::Ordering::Equal => {
|
||||||
// this block is saved
|
// this block is already saved as the head
|
||||||
"".to_string()
|
"".to_string()
|
||||||
}
|
}
|
||||||
cmp::Ordering::Greater => {
|
cmp::Ordering::Greater => {
|
||||||
@ -108,6 +148,7 @@ impl BlockWatcher {
|
|||||||
"+".to_string()
|
"+".to_string()
|
||||||
}
|
}
|
||||||
cmp::Ordering::Less => {
|
cmp::Ordering::Less => {
|
||||||
|
// this rpc is behind
|
||||||
let lag = new_block_number as i64 - head_number as i64;
|
let lag = new_block_number as i64 - head_number as i64;
|
||||||
lag.to_string()
|
lag.to_string()
|
||||||
}
|
}
|
||||||
|
@ -116,7 +116,7 @@ impl Web3ProxyApp {
|
|||||||
let read_lock = self.private_rpcs_ratelimiter_lock.read().await;
|
let read_lock = self.private_rpcs_ratelimiter_lock.read().await;
|
||||||
|
|
||||||
match private_rpcs
|
match private_rpcs
|
||||||
.get_upstream_servers(self.block_watcher.clone())
|
.get_upstream_servers(1, self.block_watcher.clone())
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(upstream_servers) => {
|
Ok(upstream_servers) => {
|
||||||
@ -167,7 +167,7 @@ impl Web3ProxyApp {
|
|||||||
|
|
||||||
for balanced_rpcs in self.balanced_rpc_tiers.iter() {
|
for balanced_rpcs in self.balanced_rpc_tiers.iter() {
|
||||||
match balanced_rpcs
|
match balanced_rpcs
|
||||||
.next_upstream_server(self.block_watcher.clone())
|
.next_upstream_server(1, self.block_watcher.clone())
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(upstream_server) => {
|
Ok(upstream_server) => {
|
||||||
|
@ -10,7 +10,7 @@ use tracing::{info, warn};
|
|||||||
use crate::block_watcher::BlockWatcherSender;
|
use crate::block_watcher::BlockWatcherSender;
|
||||||
|
|
||||||
// TODO: instead of an enum, I tried to use Box<dyn Provider>, but hit https://github.com/gakonst/ethers-rs/issues/592
|
// TODO: instead of an enum, I tried to use Box<dyn Provider>, but hit https://github.com/gakonst/ethers-rs/issues/592
|
||||||
#[derive(From)]
|
#[derive(From, Debug)]
|
||||||
pub enum Web3Provider {
|
pub enum Web3Provider {
|
||||||
Http(ethers::providers::Provider<ethers::providers::Http>),
|
Http(ethers::providers::Provider<ethers::providers::Http>),
|
||||||
Ws(ethers::providers::Provider<ethers::providers::Ws>),
|
Ws(ethers::providers::Provider<ethers::providers::Ws>),
|
||||||
@ -72,6 +72,7 @@ impl Web3Provider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// An active connection to a Web3Rpc
|
/// An active connection to a Web3Rpc
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct Web3Connection {
|
pub struct Web3Connection {
|
||||||
/// keep track of currently open requests. We sort on this
|
/// keep track of currently open requests. We sort on this
|
||||||
active_requests: u32,
|
active_requests: u32,
|
||||||
|
@ -5,11 +5,13 @@ use governor::middleware::NoOpMiddleware;
|
|||||||
use governor::state::{InMemoryState, NotKeyed};
|
use governor::state::{InMemoryState, NotKeyed};
|
||||||
use governor::NotUntil;
|
use governor::NotUntil;
|
||||||
use governor::RateLimiter;
|
use governor::RateLimiter;
|
||||||
|
use std::cmp;
|
||||||
use std::num::NonZeroU32;
|
use std::num::NonZeroU32;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use tokio::sync::RwLock;
|
use tokio::sync::RwLock;
|
||||||
|
use tracing::{info, instrument};
|
||||||
|
|
||||||
use crate::block_watcher::BlockWatcher;
|
use crate::block_watcher::{BlockWatcher, SyncStatus};
|
||||||
use crate::provider::Web3Connection;
|
use crate::provider::Web3Connection;
|
||||||
|
|
||||||
type Web3RateLimiter =
|
type Web3RateLimiter =
|
||||||
@ -20,9 +22,10 @@ type Web3RateLimiterMap = DashMap<String, Web3RateLimiter>;
|
|||||||
pub type Web3ConnectionMap = DashMap<String, Web3Connection>;
|
pub type Web3ConnectionMap = DashMap<String, Web3Connection>;
|
||||||
|
|
||||||
/// Load balance to the rpc
|
/// Load balance to the rpc
|
||||||
|
#[derive(Debug)]
|
||||||
pub struct Web3ProviderTier {
|
pub struct Web3ProviderTier {
|
||||||
/// RPC urls sorted by active requests
|
/// RPC urls sorted by active requests
|
||||||
/// TODO: what type for the rpc?
|
/// TODO: what type for the rpc? i think we want this to be the key for the provider and not the provider itself
|
||||||
rpcs: RwLock<Vec<String>>,
|
rpcs: RwLock<Vec<String>>,
|
||||||
connections: Arc<Web3ConnectionMap>,
|
connections: Arc<Web3ConnectionMap>,
|
||||||
ratelimiters: Web3RateLimiterMap,
|
ratelimiters: Web3RateLimiterMap,
|
||||||
@ -72,14 +75,66 @@ impl Web3ProviderTier {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// get the best available rpc server
|
/// get the best available rpc server
|
||||||
|
#[instrument]
|
||||||
pub async fn next_upstream_server(
|
pub async fn next_upstream_server(
|
||||||
&self,
|
&self,
|
||||||
|
allowed_lag: u64,
|
||||||
block_watcher: Arc<BlockWatcher>,
|
block_watcher: Arc<BlockWatcher>,
|
||||||
) -> Result<String, NotUntil<QuantaInstant>> {
|
) -> Result<String, NotUntil<QuantaInstant>> {
|
||||||
let mut balanced_rpcs = self.rpcs.write().await;
|
let mut available_rpcs = self.rpcs.write().await;
|
||||||
|
|
||||||
// sort rpcs by their active connections
|
// sort rpcs by their active connections
|
||||||
balanced_rpcs.sort_unstable_by(|a, b| {
|
available_rpcs.sort_unstable_by(|a, b| {
|
||||||
|
self.connections
|
||||||
|
.get(a)
|
||||||
|
.unwrap()
|
||||||
|
.cmp(&self.connections.get(b).unwrap())
|
||||||
|
});
|
||||||
|
|
||||||
|
// sort rpcs by their block height
|
||||||
|
available_rpcs.sort_unstable_by(|a, b| {
|
||||||
|
let a_synced = block_watcher.sync_status(a, allowed_lag);
|
||||||
|
let b_synced = block_watcher.sync_status(b, allowed_lag);
|
||||||
|
|
||||||
|
match (a_synced, b_synced) {
|
||||||
|
(SyncStatus::Synced(a), SyncStatus::Synced(b)) => {
|
||||||
|
if a != b {
|
||||||
|
return a.cmp(&b);
|
||||||
|
}
|
||||||
|
// else they are equal and we want to compare on active connections
|
||||||
|
}
|
||||||
|
(SyncStatus::Synced(_), SyncStatus::Unknown) => {
|
||||||
|
return cmp::Ordering::Greater;
|
||||||
|
}
|
||||||
|
(SyncStatus::Unknown, SyncStatus::Synced(_)) => {
|
||||||
|
return cmp::Ordering::Less;
|
||||||
|
}
|
||||||
|
(SyncStatus::Unknown, SyncStatus::Unknown) => {
|
||||||
|
// neither rpc is synced
|
||||||
|
// this means neither will have connections
|
||||||
|
return cmp::Ordering::Equal;
|
||||||
|
}
|
||||||
|
(SyncStatus::Synced(_), SyncStatus::Behind(_)) => {
|
||||||
|
return cmp::Ordering::Greater;
|
||||||
|
}
|
||||||
|
(SyncStatus::Behind(_), SyncStatus::Synced(_)) => {
|
||||||
|
return cmp::Ordering::Less;
|
||||||
|
}
|
||||||
|
(SyncStatus::Behind(_), SyncStatus::Unknown) => {
|
||||||
|
return cmp::Ordering::Greater;
|
||||||
|
}
|
||||||
|
(SyncStatus::Behind(a), SyncStatus::Behind(b)) => {
|
||||||
|
if a != b {
|
||||||
|
return a.cmp(&b);
|
||||||
|
}
|
||||||
|
// else they are equal and we want to compare on active connections
|
||||||
|
}
|
||||||
|
(SyncStatus::Unknown, SyncStatus::Behind(_)) => {
|
||||||
|
return cmp::Ordering::Less;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// sort on active connections
|
||||||
self.connections
|
self.connections
|
||||||
.get(a)
|
.get(a)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
@ -88,16 +143,19 @@ impl Web3ProviderTier {
|
|||||||
|
|
||||||
let mut earliest_not_until = None;
|
let mut earliest_not_until = None;
|
||||||
|
|
||||||
for selected_rpc in balanced_rpcs.iter() {
|
for selected_rpc in available_rpcs.iter() {
|
||||||
// check current block number
|
// check current block number
|
||||||
if !block_watcher
|
// TODO: i don't like that we fetched sync_status above and then do it again here. cache?
|
||||||
.is_synced(selected_rpc.clone(), 3)
|
if let SyncStatus::Synced(_) = block_watcher.sync_status(selected_rpc, allowed_lag) {
|
||||||
.await
|
// rpc is synced
|
||||||
.expect("checking is_synced failed")
|
} else {
|
||||||
{
|
|
||||||
// skip this rpc because it is not synced
|
// skip this rpc because it is not synced
|
||||||
// TODO: make a NotUntil here?
|
// TODO: make a NotUntil here?
|
||||||
continue;
|
// TODO: include how many blocks behind
|
||||||
|
// TODO: better log
|
||||||
|
info!("{} is not synced", selected_rpc);
|
||||||
|
// we sorted on block height. so if this one isn't synced, none of the later ones will be either
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check rate limits
|
// check rate limits
|
||||||
@ -109,6 +167,9 @@ impl Web3ProviderTier {
|
|||||||
Err(not_until) => {
|
Err(not_until) => {
|
||||||
// rate limit failed
|
// rate limit failed
|
||||||
// save the smallest not_until. if nothing succeeds, return an Err with not_until in it
|
// save the smallest not_until. if nothing succeeds, return an Err with not_until in it
|
||||||
|
// TODO: use tracing better
|
||||||
|
info!("Exhausted rate limit on {}: {}", selected_rpc, not_until);
|
||||||
|
|
||||||
if earliest_not_until.is_none() {
|
if earliest_not_until.is_none() {
|
||||||
earliest_not_until = Some(not_until);
|
earliest_not_until = Some(not_until);
|
||||||
} else {
|
} else {
|
||||||
@ -146,6 +207,7 @@ impl Web3ProviderTier {
|
|||||||
/// get all available rpc servers
|
/// get all available rpc servers
|
||||||
pub async fn get_upstream_servers(
|
pub async fn get_upstream_servers(
|
||||||
&self,
|
&self,
|
||||||
|
allowed_lag: u64,
|
||||||
block_watcher: Arc<BlockWatcher>,
|
block_watcher: Arc<BlockWatcher>,
|
||||||
) -> Result<Vec<String>, NotUntil<QuantaInstant>> {
|
) -> Result<Vec<String>, NotUntil<QuantaInstant>> {
|
||||||
let mut earliest_not_until = None;
|
let mut earliest_not_until = None;
|
||||||
@ -153,12 +215,9 @@ impl Web3ProviderTier {
|
|||||||
let mut selected_rpcs = vec![];
|
let mut selected_rpcs = vec![];
|
||||||
|
|
||||||
for selected_rpc in self.rpcs.read().await.iter() {
|
for selected_rpc in self.rpcs.read().await.iter() {
|
||||||
// check that the server is synced
|
if let SyncStatus::Synced(_) = block_watcher.sync_status(selected_rpc, allowed_lag) {
|
||||||
if !block_watcher
|
// rpc is synced
|
||||||
.is_synced(selected_rpc.clone(), 1)
|
} else {
|
||||||
.await
|
|
||||||
.expect("checking is_synced failed")
|
|
||||||
{
|
|
||||||
// skip this rpc because it is not synced
|
// skip this rpc because it is not synced
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user