remove allowed lag

This commit is contained in:
Bryan Stitt 2023-01-19 03:05:39 -08:00
parent 2d5d115d6f
commit 52a9ba604c
6 changed files with 74 additions and 147 deletions

View File

@ -307,6 +307,7 @@ These are not yet ordered. There might be duplicates. We might not actually need
- [x] improve rate limiting on websockets - [x] improve rate limiting on websockets
- [x] retry another server if we get a jsonrpc response error about rate limits - [x] retry another server if we get a jsonrpc response error about rate limits
- [x] major refactor to only use backup servers when absolutely necessary - [x] major refactor to only use backup servers when absolutely necessary
- [x] remove allowed lag
- [-] proxy mode for benchmarking all backends - [-] proxy mode for benchmarking all backends
- [-] proxy mode for sending to multiple backends - [-] proxy mode for sending to multiple backends
- [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly - [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly

View File

@ -190,7 +190,6 @@ pub struct Web3ProxyApp {
head_block_receiver: watch::Receiver<ArcBlock>, head_block_receiver: watch::Receiver<ArcBlock>,
pending_tx_sender: broadcast::Sender<TxStatus>, pending_tx_sender: broadcast::Sender<TxStatus>,
pub config: AppConfig, pub config: AppConfig,
pub allowed_lag: u64,
pub db_conn: Option<sea_orm::DatabaseConnection>, pub db_conn: Option<sea_orm::DatabaseConnection>,
pub db_replica: Option<DatabaseReplica>, pub db_replica: Option<DatabaseReplica>,
/// prometheus metrics /// prometheus metrics
@ -687,20 +686,8 @@ impl Web3ProxyApp {
.time_to_idle(Duration::from_secs(120)) .time_to_idle(Duration::from_secs(120))
.build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default());
// TODO: get this out of the toml instead
let allowed_lag = match top_config.app.chain_id {
1 => 60,
137 => 10,
250 => 10,
_ => {
warn!("defaulting allowed lag to 60");
60
}
};
let app = Self { let app = Self {
config: top_config.app, config: top_config.app,
allowed_lag,
balanced_rpcs, balanced_rpcs,
private_rpcs, private_rpcs,
response_cache, response_cache,
@ -1432,7 +1419,6 @@ impl Web3ProxyApp {
.balanced_rpcs .balanced_rpcs
.try_proxy_connection( .try_proxy_connection(
proxy_mode, proxy_mode,
self.allowed_lag,
&authorization, &authorization,
request, request,
Some(&request_metadata), Some(&request_metadata),
@ -1459,7 +1445,6 @@ impl Web3ProxyApp {
self.balanced_rpcs self.balanced_rpcs
.try_proxy_connection( .try_proxy_connection(
proxy_mode, proxy_mode,
self.allowed_lag,
&authorization, &authorization,
request, request,
Some(&request_metadata), Some(&request_metadata),

View File

@ -223,7 +223,6 @@ impl Web3ConnectionConfig {
pub async fn spawn( pub async fn spawn(
self, self,
name: String, name: String,
allowed_lag: u64,
db_conn: Option<DatabaseConnection>, db_conn: Option<DatabaseConnection>,
redis_pool: Option<redis_rate_limiter::RedisPool>, redis_pool: Option<redis_rate_limiter::RedisPool>,
chain_id: u64, chain_id: u64,
@ -262,7 +261,6 @@ impl Web3ConnectionConfig {
Web3Connection::spawn( Web3Connection::spawn(
name, name,
allowed_lag,
self.display_name, self.display_name,
chain_id, chain_id,
db_conn, db_conn,

View File

@ -78,11 +78,6 @@ impl SavedBlock {
pub fn number(&self) -> U64 { pub fn number(&self) -> U64 {
self.block.number.expect("saved blocks must have a number") self.block.number.expect("saved blocks must have a number")
} }
/// When the block was received, this node was still syncing
pub fn syncing(&self, allowed_lag: u64) -> bool {
self.age > allowed_lag
}
} }
impl From<ArcBlock> for SavedBlock { impl From<ArcBlock> for SavedBlock {
@ -172,7 +167,7 @@ impl Web3Connections {
// TODO: request_metadata? maybe we should put it in the authorization? // TODO: request_metadata? maybe we should put it in the authorization?
// TODO: don't hard code allowed lag // TODO: don't hard code allowed lag
let response = self let response = self
.try_send_best_consensus_head_connection(60, authorization, request, None, None) .try_send_best_consensus_head_connection(authorization, request, None, None)
.await?; .await?;
let block = response.result.context("failed fetching block")?; let block = response.result.context("failed fetching block")?;
@ -248,7 +243,7 @@ impl Web3Connections {
// TODO: if error, retry? // TODO: if error, retry?
// TODO: request_metadata or authorization? // TODO: request_metadata or authorization?
let response = self let response = self
.try_send_best_consensus_head_connection(60, authorization, request, None, Some(num)) .try_send_best_consensus_head_connection(authorization, request, None, Some(num))
.await?; .await?;
let raw_block = response.result.context("no block result")?; let raw_block = response.result.context("no block result")?;

View File

@ -63,7 +63,6 @@ pub struct Web3Connection {
pub name: String, pub name: String,
pub display_name: Option<String>, pub display_name: Option<String>,
pub db_conn: Option<DatabaseConnection>, pub db_conn: Option<DatabaseConnection>,
pub(super) allowed_lag: u64,
/// TODO: can we get this from the provider? do we even need it? /// TODO: can we get this from the provider? do we even need it?
pub(super) url: String, pub(super) url: String,
/// Some connections use an http_client. we keep a clone for reconnecting /// Some connections use an http_client. we keep a clone for reconnecting
@ -101,7 +100,6 @@ impl Web3Connection {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub async fn spawn( pub async fn spawn(
name: String, name: String,
allowed_lag: u64,
display_name: Option<String>, display_name: Option<String>,
chain_id: u64, chain_id: u64,
db_conn: Option<DatabaseConnection>, db_conn: Option<DatabaseConnection>,
@ -140,7 +138,6 @@ impl Web3Connection {
let new_connection = Self { let new_connection = Self {
name, name,
allowed_lag,
db_conn: db_conn.clone(), db_conn: db_conn.clone(),
display_name, display_name,
http_client, http_client,
@ -195,25 +192,7 @@ impl Web3Connection {
return Ok(None); return Ok(None);
} }
// check if we are synced // TODO: check eth_syncing. if it is not false, return Ok(None)
let head_block: ArcBlock = self
.wait_for_request_handle(authorization, Duration::from_secs(30), true)
.await?
.request::<_, Option<_>>(
"eth_getBlockByNumber",
&json!(("latest", false)),
// error here are expected, so keep the level low
Level::Warn.into(),
)
.await?
.context("no block during check_block_data_limit!")?;
if SavedBlock::from(head_block).syncing(60) {
// if the node is syncing, we can't check its block data limit
return Ok(None);
}
// TODO: add SavedBlock to self? probably best not to. we might not get marked Ready
let mut limit = None; let mut limit = None;
@ -296,27 +275,10 @@ impl Web3Connection {
self.block_data_limit.load(atomic::Ordering::Acquire).into() self.block_data_limit.load(atomic::Ordering::Acquire).into()
} }
pub fn syncing(&self, allowed_lag: u64) -> bool {
match self.head_block.read().clone() {
None => true,
Some(x) => x.syncing(allowed_lag),
}
}
pub fn has_block_data(&self, needed_block_num: &U64) -> bool { pub fn has_block_data(&self, needed_block_num: &U64) -> bool {
let head_block_num = match self.head_block.read().clone() { let head_block_num = match self.head_block.read().clone() {
None => return false, None => return false,
Some(x) => { Some(x) => x.number(),
// TODO: this 60 second limit is causing our polygons to fall behind. change this to number of blocks?
// TODO: sometimes blocks might actually just take longer than 60 seconds
if x.syncing(60) {
// skip syncing nodes. even though they might be able to serve a query,
// latency will be poor and it will get in the way of them syncing further
return false;
}
x.number()
}
}; };
// this rpc doesn't have that block yet. still syncing // this rpc doesn't have that block yet. still syncing
@ -548,7 +510,7 @@ impl Web3Connection {
let _ = head_block.insert(new_head_block.clone().into()); let _ = head_block.insert(new_head_block.clone().into());
} }
if self.block_data_limit() == U64::zero() && !self.syncing(1) { if self.block_data_limit() == U64::zero() {
let authorization = Arc::new(Authorization::internal(self.db_conn.clone())?); let authorization = Arc::new(Authorization::internal(self.db_conn.clone())?);
if let Err(err) = self.check_block_data_limit(&authorization).await { if let Err(err) = self.check_block_data_limit(&authorization).await {
warn!( warn!(
@ -596,8 +558,6 @@ impl Web3Connection {
reconnect: bool, reconnect: bool,
tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>, tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let allowed_lag = self.allowed_lag;
loop { loop {
let http_interval_receiver = http_interval_sender.as_ref().map(|x| x.subscribe()); let http_interval_receiver = http_interval_sender.as_ref().map(|x| x.subscribe());
@ -629,8 +589,6 @@ impl Web3Connection {
let health_sleep_seconds = 10; let health_sleep_seconds = 10;
sleep(Duration::from_secs(health_sleep_seconds)).await; sleep(Duration::from_secs(health_sleep_seconds)).await;
let mut warned = 0;
loop { loop {
// TODO: what if we just happened to have this check line up with another restart? // TODO: what if we just happened to have this check line up with another restart?
// TODO: think more about this // TODO: think more about this
@ -649,38 +607,6 @@ impl Web3Connection {
} }
// trace!("health check on {}. unlocked", conn); // trace!("health check on {}. unlocked", conn);
if let Some(x) = &*conn.head_block.read() {
// if this block is too old, return an error so we reconnect
let current_lag = x.lag();
if current_lag > allowed_lag {
let level = if warned == 0 {
if conn.backup {
log::Level::Info
} else {
log::Level::Warn
}
} else if warned % 100 == 0 {
log::Level::Debug
} else {
log::Level::Trace
};
log::log!(
level,
"{} is lagged {} secs: {} {}",
conn,
current_lag,
x.number(),
x.hash(),
);
warned += 1;
} else {
// reset warnings now that we are connected
warned = 0;
}
}
sleep(Duration::from_secs(health_sleep_seconds)).await; sleep(Duration::from_secs(health_sleep_seconds)).await;
} }
}; };
@ -1222,7 +1148,6 @@ mod tests {
let x = Web3Connection { let x = Web3Connection {
name: "name".to_string(), name: "name".to_string(),
allowed_lag: 10,
db_conn: None, db_conn: None,
display_name: None, display_name: None,
url: "ws://example.com".to_string(), url: "ws://example.com".to_string(),
@ -1271,7 +1196,6 @@ mod tests {
// TODO: this is getting long. have a `impl Default` // TODO: this is getting long. have a `impl Default`
let x = Web3Connection { let x = Web3Connection {
name: "name".to_string(), name: "name".to_string(),
allowed_lag: 10,
db_conn: None, db_conn: None,
display_name: None, display_name: None,
url: "ws://example.com".to_string(), url: "ws://example.com".to_string(),
@ -1299,6 +1223,8 @@ mod tests {
assert!(!x.has_block_data(&(head_block.number() + 1000))); assert!(!x.has_block_data(&(head_block.number() + 1000)));
} }
/*
// TODO: think about how to bring the concept of a "lagged" node back
#[test] #[test]
fn test_lagged_node_not_has_block_data() { fn test_lagged_node_not_has_block_data() {
let now: U256 = SystemTime::now() let now: U256 = SystemTime::now()
@ -1324,7 +1250,6 @@ mod tests {
let x = Web3Connection { let x = Web3Connection {
name: "name".to_string(), name: "name".to_string(),
allowed_lag: 10,
db_conn: None, db_conn: None,
display_name: None, display_name: None,
url: "ws://example.com".to_string(), url: "ws://example.com".to_string(),
@ -1349,4 +1274,5 @@ mod tests {
assert!(!x.has_block_data(&(head_block.number() + 1))); assert!(!x.has_block_data(&(head_block.number() + 1)));
assert!(!x.has_block_data(&(head_block.number() + 1000))); assert!(!x.has_block_data(&(head_block.number() + 1000)));
} }
*/
} }

View File

@ -89,9 +89,6 @@ impl Web3Connections {
} }
}; };
// TODO: this might be too aggressive. think about this more
let allowed_lag = ((expected_block_time_ms * 3) as f64 / 1000.0).round() as u64;
let http_interval_sender = if http_client.is_some() { let http_interval_sender = if http_client.is_some() {
let (sender, receiver) = broadcast::channel(1); let (sender, receiver) = broadcast::channel(1);
@ -155,7 +152,6 @@ impl Web3Connections {
server_config server_config
.spawn( .spawn(
server_name, server_name,
allowed_lag,
db_conn, db_conn,
redis_pool, redis_pool,
chain_id, chain_id,
@ -408,10 +404,40 @@ impl Web3Connections {
unimplemented!("this shouldn't be possible") unimplemented!("this shouldn't be possible")
} }
/// get the best available rpc server with the consensus head block. it might have blocks after the consensus head
pub async fn best_consensus_head_connection( pub async fn best_consensus_head_connection(
&self, &self,
allowed_lag: u64, authorization: &Arc<Authorization>,
request_metadata: Option<&Arc<RequestMetadata>>,
skip: &[Arc<Web3Connection>],
min_block_needed: Option<&U64>,
) -> anyhow::Result<OpenRequestResult> {
if let Ok(without_backups) = self
._best_consensus_head_connection(
false,
authorization,
request_metadata,
skip,
min_block_needed,
)
.await
{
return Ok(without_backups);
}
self._best_consensus_head_connection(
true,
authorization,
request_metadata,
skip,
min_block_needed,
)
.await
}
/// get the best available rpc server with the consensus head block. it might have blocks after the consensus head
async fn _best_consensus_head_connection(
&self,
allow_backups: bool,
authorization: &Arc<Authorization>, authorization: &Arc<Authorization>,
request_metadata: Option<&Arc<RequestMetadata>>, request_metadata: Option<&Arc<RequestMetadata>>,
skip: &[Arc<Web3Connection>], skip: &[Arc<Web3Connection>],
@ -421,12 +447,13 @@ impl Web3Connections {
(Option<U64>, u64), (Option<U64>, u64),
Vec<Arc<Web3Connection>>, Vec<Arc<Web3Connection>>,
> = if let Some(min_block_needed) = min_block_needed { > = if let Some(min_block_needed) = min_block_needed {
// need a potentially old block. check all the rpcs // need a potentially old block. check all the rpcs. prefer the most synced
let mut m = BTreeMap::new(); let mut m = BTreeMap::new();
for x in self for x in self
.conns .conns
.values() .values()
.filter(|x| if allow_backups { true } else { !x.backup })
.filter(|x| !skip.contains(x)) .filter(|x| !skip.contains(x))
.filter(|x| x.has_block_data(min_block_needed)) .filter(|x| x.has_block_data(min_block_needed))
.cloned() .cloned()
@ -448,15 +475,7 @@ impl Web3Connections {
// need latest. filter the synced rpcs // need latest. filter the synced rpcs
let synced_connections = self.synced_connections.load(); let synced_connections = self.synced_connections.load();
let head_block = match synced_connections.head_block.as_ref() { // TODO: if head_block is super old. emit an error!
None => return Ok(OpenRequestResult::NotReady),
Some(x) => x,
};
// TODO: self.allowed_lag instead of taking as an arg
if head_block.syncing(allowed_lag) {
return Ok(OpenRequestResult::NotReady);
}
let mut m = BTreeMap::new(); let mut m = BTreeMap::new();
@ -575,7 +594,7 @@ impl Web3Connections {
None => { None => {
// none of the servers gave us a time to retry at // none of the servers gave us a time to retry at
// TODO: bring this back? // TODO: bring this back? need to think about how to do this with `allow_backups`
// we could return an error here, but maybe waiting a second will fix the problem // we could return an error here, but maybe waiting a second will fix the problem
// TODO: configurable max wait? the whole max request time, or just some portion? // TODO: configurable max wait? the whole max request time, or just some portion?
// let handle = sorted_rpcs // let handle = sorted_rpcs
@ -605,6 +624,24 @@ impl Web3Connections {
authorization: &Arc<Authorization>, authorization: &Arc<Authorization>,
block_needed: Option<&U64>, block_needed: Option<&U64>,
max_count: Option<usize>, max_count: Option<usize>,
) -> Result<Vec<OpenRequestHandle>, Option<Instant>> {
if let Ok(without_backups) = self
._all_synced_connections(false, authorization, block_needed, max_count)
.await
{
return Ok(without_backups);
}
self._all_synced_connections(true, authorization, block_needed, max_count)
.await
}
async fn _all_synced_connections(
&self,
allow_backups: bool,
authorization: &Arc<Authorization>,
block_needed: Option<&U64>,
max_count: Option<usize>,
) -> Result<Vec<OpenRequestHandle>, Option<Instant>> { ) -> Result<Vec<OpenRequestHandle>, Option<Instant>> {
let mut earliest_retry_at = None; let mut earliest_retry_at = None;
// TODO: with capacity? // TODO: with capacity?
@ -621,12 +658,14 @@ impl Web3Connections {
break; break;
} }
if !allow_backups && connection.backup {
continue;
}
if let Some(block_needed) = block_needed { if let Some(block_needed) = block_needed {
if !connection.has_block_data(block_needed) { if !connection.has_block_data(block_needed) {
continue; continue;
} }
} else if connection.syncing(30) {
continue;
} }
// check rate limits and increment our connection counter // check rate limits and increment our connection counter
@ -663,10 +702,8 @@ impl Web3Connections {
} }
/// be sure there is a timeout on this or it might loop forever /// be sure there is a timeout on this or it might loop forever
/// TODO: do not take allowed_lag here. have it be on the connections struct instead
pub async fn try_send_best_consensus_head_connection( pub async fn try_send_best_consensus_head_connection(
&self, &self,
allowed_lag: u64,
authorization: &Arc<Authorization>, authorization: &Arc<Authorization>,
request: JsonRpcRequest, request: JsonRpcRequest,
request_metadata: Option<&Arc<RequestMetadata>>, request_metadata: Option<&Arc<RequestMetadata>>,
@ -682,7 +719,6 @@ impl Web3Connections {
} }
match self match self
.best_consensus_head_connection( .best_consensus_head_connection(
allowed_lag,
authorization, authorization,
request_metadata, request_metadata,
&skip_rpcs, &skip_rpcs,
@ -903,7 +939,6 @@ impl Web3Connections {
pub async fn try_proxy_connection( pub async fn try_proxy_connection(
&self, &self,
proxy_mode: ProxyMode, proxy_mode: ProxyMode,
allowed_lag: u64,
authorization: &Arc<Authorization>, authorization: &Arc<Authorization>,
request: JsonRpcRequest, request: JsonRpcRequest,
request_metadata: Option<&Arc<RequestMetadata>>, request_metadata: Option<&Arc<RequestMetadata>>,
@ -912,7 +947,6 @@ impl Web3Connections {
match proxy_mode { match proxy_mode {
ProxyMode::Best => { ProxyMode::Best => {
self.try_send_best_consensus_head_connection( self.try_send_best_consensus_head_connection(
allowed_lag,
authorization, authorization,
request, request,
request_metadata, request_metadata,
@ -1014,8 +1048,6 @@ mod tests {
let head_rpc = Web3Connection { let head_rpc = Web3Connection {
name: "synced".to_string(), name: "synced".to_string(),
// TODO: what should this be?
allowed_lag: 10,
db_conn: None, db_conn: None,
display_name: None, display_name: None,
url: "ws://example.com/synced".to_string(), url: "ws://example.com/synced".to_string(),
@ -1036,7 +1068,6 @@ mod tests {
let lagged_rpc = Web3Connection { let lagged_rpc = Web3Connection {
name: "lagged".to_string(), name: "lagged".to_string(),
allowed_lag: 10,
db_conn: None, db_conn: None,
display_name: None, display_name: None,
url: "ws://example.com/lagged".to_string(), url: "ws://example.com/lagged".to_string(),
@ -1129,9 +1160,8 @@ mod tests {
); );
// best_synced_backend_connection requires servers to be synced with the head block // best_synced_backend_connection requires servers to be synced with the head block
// TODO: don't hard code allowed_lag
let x = conns let x = conns
.best_consensus_head_connection(60, &authorization, None, &[], None) .best_consensus_head_connection(&authorization, None, &[], None)
.await .await
.unwrap(); .unwrap();
@ -1186,21 +1216,21 @@ mod tests {
assert!(matches!( assert!(matches!(
conns conns
.best_consensus_head_connection(60, &authorization, None, &[], None) .best_consensus_head_connection(&authorization, None, &[], None)
.await, .await,
Ok(OpenRequestResult::Handle(_)) Ok(OpenRequestResult::Handle(_))
)); ));
assert!(matches!( assert!(matches!(
conns conns
.best_consensus_head_connection(60, &authorization, None, &[], Some(&0.into())) .best_consensus_head_connection(&authorization, None, &[], Some(&0.into()))
.await, .await,
Ok(OpenRequestResult::Handle(_)) Ok(OpenRequestResult::Handle(_))
)); ));
assert!(matches!( assert!(matches!(
conns conns
.best_consensus_head_connection(60, &authorization, None, &[], Some(&1.into())) .best_consensus_head_connection(&authorization, None, &[], Some(&1.into()))
.await, .await,
Ok(OpenRequestResult::Handle(_)) Ok(OpenRequestResult::Handle(_))
)); ));
@ -1208,7 +1238,7 @@ mod tests {
// future block should not get a handle // future block should not get a handle
assert!(matches!( assert!(matches!(
conns conns
.best_consensus_head_connection(60, &authorization, None, &[], Some(&2.into())) .best_consensus_head_connection(&authorization, None, &[], Some(&2.into()))
.await, .await,
Ok(OpenRequestResult::NotReady) Ok(OpenRequestResult::NotReady)
)); ));
@ -1241,7 +1271,6 @@ mod tests {
let pruned_rpc = Web3Connection { let pruned_rpc = Web3Connection {
name: "pruned".to_string(), name: "pruned".to_string(),
allowed_lag: 10,
db_conn: None, db_conn: None,
display_name: None, display_name: None,
url: "ws://example.com/pruned".to_string(), url: "ws://example.com/pruned".to_string(),
@ -1262,7 +1291,6 @@ mod tests {
let archive_rpc = Web3Connection { let archive_rpc = Web3Connection {
name: "archive".to_string(), name: "archive".to_string(),
allowed_lag: 10,
db_conn: None, db_conn: None,
display_name: None, display_name: None,
url: "ws://example.com/archive".to_string(), url: "ws://example.com/archive".to_string(),
@ -1343,13 +1371,7 @@ mod tests {
// best_synced_backend_connection requires servers to be synced with the head block // best_synced_backend_connection requires servers to be synced with the head block
let best_head_server = conns let best_head_server = conns
.best_consensus_head_connection( .best_consensus_head_connection(&authorization, None, &[], Some(&head_block.number()))
60,
&authorization,
None,
&[],
Some(&head_block.number()),
)
.await; .await;
assert!(matches!( assert!(matches!(
@ -1358,7 +1380,7 @@ mod tests {
)); ));
let best_archive_server = conns let best_archive_server = conns
.best_consensus_head_connection(60, &authorization, None, &[], Some(&1.into())) .best_consensus_head_connection(&authorization, None, &[], Some(&1.into()))
.await; .await;
match best_archive_server { match best_archive_server {