more fork detection work

This commit is contained in:
Bryan Stitt 2022-09-01 05:58:55 +00:00
parent 4bb32e2767
commit 6450a4cd42
9 changed files with 177 additions and 124 deletions

17
Cargo.lock generated
View File

@ -85,9 +85,9 @@ dependencies = [
[[package]] [[package]]
name = "anyhow" name = "anyhow"
version = "1.0.62" version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1485d4d2cc45e7b201ee3767015c96faa5904387c9d87c6efdd0fb511f12d305" checksum = "a26fa4d7e3f2eebadf743988fc8aec9fa9a9e82611acafd77c1462ed6262440a"
dependencies = [ dependencies = [
"backtrace", "backtrace",
] ]
@ -1245,13 +1245,14 @@ dependencies = [
[[package]] [[package]]
name = "dashmap" name = "dashmap"
version = "5.3.4" version = "5.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3495912c9c1ccf2e18976439f4443f3fee0fd61f424ff99fde6a66b15ecb448f" checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"hashbrown", "hashbrown",
"lock_api", "lock_api",
"once_cell",
"parking_lot_core 0.9.3", "parking_lot_core 0.9.3",
] ]
@ -2671,9 +2672,9 @@ dependencies = [
[[package]] [[package]]
name = "lock_api" name = "lock_api"
version = "0.4.7" version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390"
dependencies = [ dependencies = [
"autocfg 1.1.0", "autocfg 1.1.0",
"scopeguard", "scopeguard",
@ -2954,9 +2955,9 @@ dependencies = [
[[package]] [[package]]
name = "once_cell" name = "once_cell"
version = "1.13.0" version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" checksum = "074864da206b4973b84eb91683020dbefd6a8c3f0f38e054d93954e891935e4e"
[[package]] [[package]]
name = "oorandom" name = "oorandom"

15
TODO.md
View File

@ -90,7 +90,8 @@
- whenever blocks were slow, we started checking as fast as possible - whenever blocks were slow, we started checking as fast as possible
- [x] create user script should allow setting requests per minute - [x] create user script should allow setting requests per minute
- [x] cache api keys that are not in the database - [x] cache api keys that are not in the database
- [x] improve consensus block selection. Our goal is to find the highest work chain with a block over a minimum threshold of sum_soft_limit. - [ ] improve consensus block selection. Our goal is to find the highest work chain with a block over a minimum threshold of sum_soft_limit.
- [x] i saw a fork of like 300 blocks. probably just because a node was restarted and had fallen behind. need some checks to ignore things that are far behind. this improvement should fix this problem
- [x] A new block arrives at a connection. - [x] A new block arrives at a connection.
- [x] It checks that it isn't the same that it already has (which is a problem with polling nodes) - [x] It checks that it isn't the same that it already has (which is a problem with polling nodes)
- [x] If its new to this node... - [x] If its new to this node...
@ -113,7 +114,12 @@
- if all_simple_paths returns no paths, warn about a chain split? - if all_simple_paths returns no paths, warn about a chain split?
- [x] now that we have a consensus head with enough soft limit (or an empty set), update SyncedConnections - [x] now that we have a consensus head with enough soft limit (or an empty set), update SyncedConnections
- [x] send the block through new head_block_sender - [x] send the block through new head_block_sender
- [x] rewrite cannonical_block - [x] rewrite cannonical_block to work as long as there are no forks
- [ ] rewrite cannonical_block (again) and related functions to handle forks
- [ ] todo!("do something with the old hash. we need to update a bunch more block numbers")
- [ ] todo!("handle equal") and also less and greater
- [x] "chain is forked" message is wrong. it includes nodes just being on different heights of the same chain. need a smarter check
- i think there is also a bug because i've seen "server not synced" a couple times
- [x] bug around eth_getBlockByHash sometimes causes tokio to lock up - [x] bug around eth_getBlockByHash sometimes causes tokio to lock up
- i keep a mapping of blocks so that i can go from hash -> block. it has some consistent hashing it does to split them up across multiple maps each with their own lock. so a lot of the time reads dont block writes because they are in different internal maps. this was fine. - i keep a mapping of blocks so that i can go from hash -> block. it has some consistent hashing it does to split them up across multiple maps each with their own lock. so a lot of the time reads dont block writes because they are in different internal maps. this was fine.
- but after changing my fork detection logic to use the same rules as erigon, i discovered that when you get blocks from a websocket subscription in erigon and geth, theres a missing field (https://github.com/ledgerwatch/erigon/issues/5190). so i added a query to get the block that includes the missing field. - but after changing my fork detection logic to use the same rules as erigon, i discovered that when you get blocks from a websocket subscription in erigon and geth, theres a missing field (https://github.com/ledgerwatch/erigon/issues/5190). so i added a query to get the block that includes the missing field.
@ -125,12 +131,9 @@
- but under heavy load, we hit their rate limits. need a "retry_until_success" function that goes to balanced_rpcs. or maybe store in redis the txids that we broadcast privately and use that to route. - but under heavy load, we hit their rate limits. need a "retry_until_success" function that goes to balanced_rpcs. or maybe store in redis the txids that we broadcast privately and use that to route.
- [ ] write a function for receipts that tries balanced_rpcs and only on error of all balanced tries privates - [ ] write a function for receipts that tries balanced_rpcs and only on error of all balanced tries privates
- [-] basic request method stats (using the user_id and other fields that are in the tracing frame) - [-] basic request method stats (using the user_id and other fields that are in the tracing frame)
- [ ] "chain is forked" message is wrong. it includes nodes just being on different heights of the same chain. need a smarter check
- i think there is also a bug because i've seen "server not synced" a couple times
- [x] i saw a fork of like 300 blocks. probably just because a node was restarted and had fallen behind. need some checks to ignore things that are far behind
- [ ] todo!("pick the block on the current consensus chain")
- [ ] web3connection3.block(...) might wait forever. be sure to do it safely - [ ] web3connection3.block(...) might wait forever. be sure to do it safely
- [ ] search for all "todo!" - [ ] search for all "todo!"
- [ ] replace all `.context("no servers in sync")` with proper error type
## V1 ## V1

View File

@ -5,6 +5,6 @@ authors = ["Bryan Stitt <bryan@stitthappens.com>"]
edition = "2021" edition = "2021"
[dependencies] [dependencies]
anyhow = "1.0.62" anyhow = "1.0.63"
bb8-redis = "0.11.0" bb8-redis = "0.11.0"
tracing = "0.1.36" tracing = "0.1.36"

View File

@ -84,9 +84,7 @@ impl RedisRateLimit {
.await .await
.context("increment rate limit")?; .context("increment rate limit")?;
let new_count = x let new_count = x.first().context("check rate limit result")?;
.first()
.ok_or_else(|| anyhow::anyhow!("check rate limit result"))?;
if new_count > &max_per_period { if new_count > &max_per_period {
let seconds_left_in_period = self.period - (now % self.period); let seconds_left_in_period = self.period - (now % self.period);

View File

@ -16,7 +16,7 @@ entities = { path = "../entities" }
migration = { path = "../migration" } migration = { path = "../migration" }
redis-rate-limit = { path = "../redis-rate-limit" } redis-rate-limit = { path = "../redis-rate-limit" }
anyhow = { version = "1.0.62", features = ["backtrace"] } anyhow = { version = "1.0.63", features = ["backtrace"] }
arc-swap = "1.5.1" arc-swap = "1.5.1"
argh = "0.1.8" argh = "0.1.8"
axum = { version = "0.5.15", features = ["headers", "serde_json", "tokio-tungstenite", "ws"] } axum = { version = "0.5.15", features = ["headers", "serde_json", "tokio-tungstenite", "ws"] }
@ -25,7 +25,7 @@ axum-client-ip = "0.2.0"
axum-macros = "0.2.3" axum-macros = "0.2.3"
# TODO: turn tokio-console on with a feature. console-subscriber = { version = "0.1.7" } # TODO: turn tokio-console on with a feature. console-subscriber = { version = "0.1.7" }
counter = "0.5.6" counter = "0.5.6"
dashmap = "5.3.4" dashmap = "5.4.0"
derive_more = "0.99.17" derive_more = "0.99.17"
dotenv = "0.15.0" dotenv = "0.15.0"
ethers = { version = "0.17.0", features = ["rustls", "ws"] } ethers = { version = "0.17.0", features = ["rustls", "ws"] }

View File

@ -597,7 +597,9 @@ impl Web3ProxyApp {
self.balanced_rpcs.block_hash(min_block_needed).await? self.balanced_rpcs.block_hash(min_block_needed).await?
} else { } else {
// TODO: maybe this should be on the app and not on balanced_rpcs // TODO: maybe this should be on the app and not on balanced_rpcs
self.balanced_rpcs.head_block_hash() self.balanced_rpcs
.head_block_hash()
.context("no servers in sync")?
}; };
// TODO: better key? benchmark this // TODO: better key? benchmark this
@ -724,16 +726,15 @@ impl Web3ProxyApp {
serde_json::Value::Array(vec![]) serde_json::Value::Array(vec![])
} }
"eth_blockNumber" => { "eth_blockNumber" => {
// TODO: emit stats match self.balanced_rpcs.head_block_num() {
Some(head_block_num) => {
let head_block_number = self.balanced_rpcs.head_block_num(); json!(head_block_num)
}
// TODO: technically, block 0 is okay. i guess we should be using an option None => {
if head_block_number.as_u64() == 0 { // TODO: what does geth do if this happens?
return Err(anyhow::anyhow!("no servers synced")); return Err(anyhow::anyhow!("no servers synced"));
}
} }
json!(head_block_number)
} }
// TODO: eth_callBundle (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_callbundle) // TODO: eth_callBundle (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_callbundle)
// TODO: eth_cancelPrivateTransaction (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_cancelprivatetransaction, but maybe just reject) // TODO: eth_cancelPrivateTransaction (https://docs.flashbots.net/flashbots-auction/searchers/advanced/rpc-endpoint#eth_cancelprivatetransaction, but maybe just reject)
@ -807,7 +808,10 @@ impl Web3ProxyApp {
method => { method => {
// emit stats // emit stats
let head_block_number = self.balanced_rpcs.head_block_num(); let head_block_number = self
.balanced_rpcs
.head_block_num()
.context("no servers synced")?;
// we do this check before checking caches because it might modify the request params // we do this check before checking caches because it might modify the request params
// TODO: add a stat for archive vs full since they should probably cost different // TODO: add a stat for archive vs full since they should probably cost different

View File

@ -5,6 +5,7 @@ use super::transactions::TxStatus;
use crate::{ use crate::{
config::BlockAndRpc, jsonrpc::JsonRpcRequest, rpcs::synced_connections::SyncedConnections, config::BlockAndRpc, jsonrpc::JsonRpcRequest, rpcs::synced_connections::SyncedConnections,
}; };
use anyhow::Context;
use dashmap::{ use dashmap::{
mapref::{entry::Entry, one::Ref}, mapref::{entry::Entry, one::Ref},
DashMap, DashMap,
@ -13,8 +14,9 @@ use derive_more::From;
use ethers::prelude::{Block, TxHash, H256, U64}; use ethers::prelude::{Block, TxHash, H256, U64};
use hashbrown::{HashMap, HashSet}; use hashbrown::{HashMap, HashSet};
use petgraph::algo::all_simple_paths; use petgraph::algo::all_simple_paths;
use serde::Serialize;
use serde_json::json; use serde_json::json;
use std::sync::Arc; use std::{cmp::Ordering, fmt::Display, sync::Arc};
use tokio::sync::{broadcast, watch}; use tokio::sync::{broadcast, watch};
use tracing::{debug, info, trace, warn}; use tracing::{debug, info, trace, warn};
@ -23,27 +25,27 @@ pub type ArcBlock = Arc<Block<TxHash>>;
pub type BlockHashesMap = Arc<DashMap<H256, ArcBlock>>; pub type BlockHashesMap = Arc<DashMap<H256, ArcBlock>>;
/// A block's hash and number. /// A block's hash and number.
#[derive(Default, From)] #[derive(Clone, Debug, Default, From, Serialize)]
pub struct BlockId { pub struct BlockId {
pub(super) hash: H256, pub(super) hash: H256,
pub(super) num: U64, pub(super) num: U64,
} }
impl Display for BlockId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} ({})", self.num, self.hash)
}
}
impl Web3Connections { impl Web3Connections {
/// add a block to our map and it's hash to our graphmap of the blockchain /// add a block to our map and it's hash to our graphmap of the blockchain
pub fn save_block(&self, block: &ArcBlock) -> anyhow::Result<()> { pub fn save_block(&self, block: &ArcBlock) -> anyhow::Result<()> {
let block_hash = block let block_hash = block.hash.as_ref().context("no block hash")?;
.hash let block_num = block.number.as_ref().context("no block num")?;
.as_ref()
.ok_or_else(|| anyhow::anyhow!("no block hash"))?;
let block_num = block
.number
.as_ref()
.ok_or_else(|| anyhow::anyhow!("no block num"))?;
let _block_td = block let _block_td = block
.total_difficulty .total_difficulty
.as_ref() .as_ref()
.ok_or_else(|| anyhow::anyhow!("no block total difficulty"))?; .context("no block total difficulty")?;
if self.block_hashes.contains_key(block_hash) { if self.block_hashes.contains_key(block_hash) {
// this block is already included. no need to continue // this block is already included. no need to continue
@ -68,10 +70,14 @@ impl Web3Connections {
match self.block_numbers.entry(*block_num) { match self.block_numbers.entry(*block_num) {
Entry::Occupied(mut x) => { Entry::Occupied(mut x) => {
x.get_mut().push(*block_hash); let old = x.insert(*block_hash);
todo!(
"do something with the old hash. we need to update a bunch more block numbers"
)
} }
Entry::Vacant(x) => { Entry::Vacant(x) => {
x.insert(vec![*block_hash]); x.insert(*block_hash);
} }
} }
@ -157,30 +163,20 @@ impl Web3Connections {
// first, try to get the hash from our cache // first, try to get the hash from our cache
if let Some(block_hash) = self.block_numbers.get(num) { if let Some(block_hash) = self.block_numbers.get(num) {
match block_hash.len() { let block = self
0 => { .block_hashes
unimplemented!("block_numbers is broken") .get(&block_hash)
} .expect("block_numbers gave us this hash");
1 => {
let block_hash = block_hash.get(0).expect("length was checked");
let block = self return Ok(block.clone());
.block_hashes
.get(block_hash)
.expect("block_numbers gave us this hash");
return Ok(block.clone());
}
_ => {
// TODO: maybe the vec should be sorted by total difficulty.
todo!("pick the block on the current consensus chain")
}
}
} }
// block not in cache. we need to ask an rpc for it // block not in cache. we need to ask an rpc for it
// but before we do any queries, be sure the requested block num exists // but before we do any queries, be sure the requested block num exists
let head_block_num = self.head_block_num(); let head_block_num = self
.head_block_num()
.ok_or_else(|| anyhow::anyhow!("no servers in sync"))?;
if num > &head_block_num { if num > &head_block_num {
// TODO: i'm seeing this a lot when using ethspam. i dont know why though. i thought we delayed publishing // TODO: i'm seeing this a lot when using ethspam. i dont know why though. i thought we delayed publishing
// TODO: instead of error, maybe just sleep and try again? // TODO: instead of error, maybe just sleep and try again?
@ -252,16 +248,23 @@ impl Web3Connections {
pending_tx_sender: &Option<broadcast::Sender<TxStatus>>, pending_tx_sender: &Option<broadcast::Sender<TxStatus>>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// add the block to connection_heads // add the block to connection_heads
match (rpc_head_block.hash, rpc_head_block.number) { let rpc_block_id = match (rpc_head_block.hash, rpc_head_block.number) {
(Some(rpc_head_hash), Some(rpc_head_num)) => { (Some(rpc_head_hash), Some(rpc_head_num)) => {
if rpc_head_num == U64::zero() { if rpc_head_num == U64::zero() {
debug!(%rpc, "still syncing"); debug!(%rpc, "still syncing");
connection_heads.remove(&rpc.name); connection_heads.remove(&rpc.name);
None
} else { } else {
connection_heads.insert(rpc.name.to_owned(), rpc_head_hash); connection_heads.insert(rpc.name.to_owned(), rpc_head_hash);
self.save_block(&rpc_head_block)?; self.save_block(&rpc_head_block)?;
Some(BlockId {
hash: rpc_head_hash,
num: rpc_head_num,
})
} }
} }
_ => { _ => {
@ -270,14 +273,15 @@ impl Web3Connections {
connection_heads.remove(&rpc.name); connection_heads.remove(&rpc.name);
// don't return yet! self.synced_connections likely needs an update // don't return yet! self.synced_connections likely needs an update
None
} }
} };
// iterate the rpc_map to find the highest_work_block // iterate the rpc_map to find the highest_work_block
let mut checked_heads = HashSet::new(); let mut checked_heads = HashSet::new();
let mut highest_work_block: Option<Ref<H256, ArcBlock>> = None; let mut highest_work_block: Option<Ref<H256, ArcBlock>> = None;
for (_rpc_name, rpc_head_hash) in connection_heads.iter() { for rpc_head_hash in connection_heads.values() {
if checked_heads.contains(rpc_head_hash) { if checked_heads.contains(rpc_head_hash) {
continue; continue;
} }
@ -318,11 +322,11 @@ impl Web3Connections {
}; };
// track names so we don't check the same node multiple times // track names so we don't check the same node multiple times
let mut consensus_names: HashSet<&String> = HashSet::new(); let mut heavy_names: HashSet<&String> = HashSet::new();
// track rpcs so we can build a new SyncedConnections // track rpcs so we can build a new SyncedConnections
let mut consensus_rpcs: Vec<&Arc<Web3Connection>> = vec![]; let mut heavy_rpcs: Vec<&Arc<Web3Connection>> = vec![];
// a running total of the soft limits covered by the rpcs // a running total of the soft limits covered by the rpcs
let mut consensus_sum_soft_limit: u32 = 0; let mut heavy_sum_soft_limit: u32 = 0;
// check the highest work block and its parents for a set of rpcs that can serve our request load // check the highest work block and its parents for a set of rpcs that can serve our request load
// TODO: loop for how many parent blocks? we don't want to serve blocks that are too far behind // TODO: loop for how many parent blocks? we don't want to serve blocks that are too far behind
@ -331,7 +335,7 @@ impl Web3Connections {
let highest_work_hash = highest_work_block.hash.as_ref().unwrap(); let highest_work_hash = highest_work_block.hash.as_ref().unwrap();
for (rpc_name, rpc_head_hash) in connection_heads.iter() { for (rpc_name, rpc_head_hash) in connection_heads.iter() {
if consensus_names.contains(rpc_name) { if heavy_names.contains(rpc_name) {
// this block is already included // this block is already included
continue; continue;
} }
@ -339,9 +343,9 @@ impl Web3Connections {
// TODO: does all_simple_paths make this check? // TODO: does all_simple_paths make this check?
if rpc_head_hash == highest_work_hash { if rpc_head_hash == highest_work_hash {
if let Some(rpc) = self.conns.get(rpc_name) { if let Some(rpc) = self.conns.get(rpc_name) {
consensus_names.insert(rpc_name); heavy_names.insert(rpc_name);
consensus_rpcs.push(rpc); heavy_rpcs.push(rpc);
consensus_sum_soft_limit += rpc.soft_limit; heavy_sum_soft_limit += rpc.soft_limit;
} }
continue; continue;
} }
@ -362,21 +366,21 @@ impl Web3Connections {
if is_connected { if is_connected {
if let Some(rpc) = self.conns.get(rpc_name) { if let Some(rpc) = self.conns.get(rpc_name) {
consensus_rpcs.push(rpc); heavy_rpcs.push(rpc);
consensus_sum_soft_limit += rpc.soft_limit; heavy_sum_soft_limit += rpc.soft_limit;
} }
} }
} }
// TODO: min_sum_soft_limit as a percentage of total_soft_limit? // TODO: min_sum_soft_limit as a percentage of total_soft_limit?
// let min_sum_soft_limit = total_soft_limit / self.min_sum_soft_limit; // let min_sum_soft_limit = total_soft_limit / self.min_sum_soft_limit;
if consensus_sum_soft_limit >= self.min_sum_soft_limit { if heavy_sum_soft_limit >= self.min_sum_soft_limit {
// success! this block has enough nodes on it // success! this block has enough nodes on it
break; break;
} }
// else, we need to try the parent block // else, we need to try the parent block
trace!(%consensus_sum_soft_limit, ?highest_work_hash, "avoiding thundering herd"); trace!(%heavy_sum_soft_limit, ?highest_work_hash, "avoiding thundering herd");
// // TODO: this automatically queries for parents, but need to rearrange lifetimes to make an await work here // // TODO: this automatically queries for parents, but need to rearrange lifetimes to make an await work here
// highest_work_block = self // highest_work_block = self
@ -389,9 +393,9 @@ impl Web3Connections {
None => { None => {
warn!( warn!(
"ran out of parents to check. soft limit only {}/{}: {}%", "ran out of parents to check. soft limit only {}/{}: {}%",
consensus_sum_soft_limit, heavy_sum_soft_limit,
self.min_sum_soft_limit, self.min_sum_soft_limit,
consensus_sum_soft_limit * 100 / self.min_sum_soft_limit heavy_sum_soft_limit * 100 / self.min_sum_soft_limit
); );
break; break;
} }
@ -403,29 +407,34 @@ impl Web3Connections {
// unlock self.blockchain_graphmap // unlock self.blockchain_graphmap
drop(blockchain_guard); drop(blockchain_guard);
let soft_limit_met = consensus_sum_soft_limit >= self.min_sum_soft_limit; let soft_limit_met = heavy_sum_soft_limit >= self.min_sum_soft_limit;
let num_synced_rpcs = consensus_rpcs.len() as u32; let num_synced_rpcs = heavy_rpcs.len() as u32;
let new_synced_connections = if soft_limit_met { let new_synced_connections = if soft_limit_met {
// we have a consensus large enough to serve traffic // we have a heavy large enough to serve traffic
let head_block_hash = highest_work_block.hash.unwrap(); let head_block_hash = highest_work_block.hash.unwrap();
let head_block_num = highest_work_block.number.unwrap(); let head_block_num = highest_work_block.number.unwrap();
if num_synced_rpcs < self.min_synced_rpcs { if num_synced_rpcs < self.min_synced_rpcs {
trace!(hash=%head_block_hash, num=?head_block_num, "not enough rpcs are synced to advance"); // TODO: warn is too loud. if we are first starting, this is expected to happen
warn!(hash=%head_block_hash, num=?head_block_num, "not enough rpcs are synced to advance");
return Ok(()); SyncedConnections::default()
} else { } else {
// TODO: wait until at least most of the rpcs have given their initial block? // TODO: wait until at least most of the rpcs have given their initial block?
// otherwise, if there is a syncing node that is fast, our first head block might not be good // otherwise, if there is a syncing node that is fast, our first head block might not be good
// TODO: have a configurable "minimum rpcs" number that we can set // TODO: have a configurable "minimum rpcs" number that we can set
// TODO: sort by weight and soft limit? do we need an IndexSet, or is a Vec fine? // TODO: sort by weight and soft limit? do we need an IndexSet, or is a Vec fine?
let conns = consensus_rpcs.into_iter().cloned().collect(); let conns = heavy_rpcs.into_iter().cloned().collect();
let head_block_id = BlockId {
hash: head_block_hash,
num: head_block_num,
};
SyncedConnections { SyncedConnections {
head_block_num, head_block_id: Some(head_block_id),
head_block_hash,
conns, conns,
} }
} }
@ -438,36 +447,71 @@ impl Web3Connections {
SyncedConnections::default() SyncedConnections::default()
}; };
let consensus_block_hash = new_synced_connections.head_block_hash; let heavy_block_id = new_synced_connections.head_block_id.clone();
let consensus_block_num = new_synced_connections.head_block_num;
let new_synced_connections = Arc::new(new_synced_connections); let new_synced_connections = Arc::new(new_synced_connections);
let num_connection_heads = connection_heads.len(); let num_connection_heads = connection_heads.len();
let total_rpcs = self.conns.len(); let total_conns = self.conns.len();
let old_synced_connections = self.synced_connections.swap(new_synced_connections); let old_synced_connections = self.synced_connections.swap(new_synced_connections);
let old_head_hash = old_synced_connections.head_block_hash; match (&old_synced_connections.head_block_id, &heavy_block_id) {
(None, None) => warn!("no servers synced"),
if rpc_head_block.hash.is_some() && Some(consensus_block_hash) != rpc_head_block.hash { (None, Some(heavy_block_id)) => {
info!(new=%rpc_head_block.hash.unwrap(), new_num=?rpc_head_block.number.unwrap(), consensus=%consensus_block_hash, num=%consensus_block_num, %rpc, "non consensus head"); debug!(block=%heavy_block_id, %rpc, "first consensus head");
// TODO: anything else to do? maybe warn if these blocks are very far apart or forked for an extended period of time }
// TODO: if there is any non-consensus head log how many nodes are on it (Some(_), None) => warn!("no longer synced!"),
(Some(old_block_id), Some(heavy_block_id)) => {
match heavy_block_id.num.cmp(&old_block_id.num) {
Ordering::Equal => {
todo!("handle equal")
}
Ordering::Less => {
todo!("handle less")
}
Ordering::Greater => {
todo!("handle greater")
}
}
}
} }
/*
if old_synced_connections.head_block_id.is_none() && rpc_head_block.hash.is_some() {
// this is fine. we have our first hash
} else if rpc_head_block.hash.is_some()
&& old_synced_connections.head_block_id.is_some()
&& old_synced_connections
.head_block_id
.as_ref()
.map_ok(|x| x.num)
!= rpc_head_block.hash
{
info!(new=%rpc_head_block.hash.unwrap(), new_num=?rpc_head_block.number.unwrap(), heavy=?heavy_block_id, %rpc, "non heavy head");
// TODO: anything else to do? maybe warn if these blocks are very far apart or forked for an extended period of time
// TODO: if there is any non-heavy head log how many nodes are on it
} */
if consensus_block_hash == old_head_hash { /*
debug!(hash=%consensus_block_hash, num=%consensus_block_num, limit=%consensus_sum_soft_limit, %rpc, "cur consensus head {}/{}/{}", num_synced_rpcs, num_connection_heads, total_rpcs); if heavy_block_num == U64::zero {
warn!(?soft_limit_met, %heavy_block_hash, %old_head_hash, %rpc, "NO heavy head {}/{}/{}", num_synced_rpcs, num_connection_heads, total_rpcs)
} else if heavy_block_hash == old_head_hash {
debug!(hash=%heavy_block_hash, num=%heavy_block_num, limit=%heavy_sum_soft_limit, %rpc, "cur heavy head {}/{}/{}", num_synced_rpcs, num_connection_heads, total_rpcs);
} else if soft_limit_met { } else if soft_limit_met {
// TODO: if new's parent is not old, warn? // TODO: if new's parent is not old, warn?
debug!(hash=%consensus_block_hash, num=%consensus_block_num, limit=%consensus_sum_soft_limit, %rpc, "NEW consensus head {}/{}/{}", num_synced_rpcs, num_connection_heads, total_rpcs); debug!(hash=%heavy_block_hash, num=%heavy_block_num, limit=%heavy_sum_soft_limit, %rpc, "NEW heavy head {}/{}/{}", num_synced_rpcs, num_connection_heads, total_rpcs);
// the head hash changed. forward to any subscribers // the head hash changed. forward to any subscribers
head_block_sender.send(highest_work_block)?; head_block_sender.send(highest_work_block)?;
// TODO: do something with pending_tx_sender // TODO: do something with pending_tx_sender
} else { } else {
warn!(?soft_limit_met, %consensus_block_hash, %old_head_hash, %rpc, "NO consensus head {}/{}/{}", num_synced_rpcs, num_connection_heads, total_rpcs) // TODO: i don't think we can get here
warn!(?soft_limit_met, %heavy_block_id, %old_head_hash, %rpc, "NO heavy head {}/{}/{}", num_synced_rpcs, num_connection_heads, total_rpcs)
} }
*/
// TODO: the head hash changed. forward to any subscribers
head_block_sender.send(highest_work_block)?;
Ok(()) Ok(())
} }

View File

@ -34,14 +34,15 @@ use tracing::{error, info, instrument, trace, warn};
/// A collection of web3 connections. Sends requests either the current best server or all servers. /// A collection of web3 connections. Sends requests either the current best server or all servers.
#[derive(From)] #[derive(From)]
pub struct Web3Connections { pub struct Web3Connections {
pub(super) conns: HashMap<String, Arc<Web3Connection>>, pub(super) inner: HashMap<String, Arc<Web3Connection>>,
/// any requests will be forwarded to one (or more) of these connections /// any requests will be forwarded to one (or more) of these connections
pub(super) synced_connections: ArcSwap<SyncedConnections>, pub(super) synced_connections: ArcSwap<SyncedConnections>,
pub(super) pending_transactions: Arc<DashMap<TxHash, TxStatus>>, pub(super) pending_transactions: Arc<DashMap<TxHash, TxStatus>>,
/// TODO: this map is going to grow forever unless we do some sort of pruning. maybe store pruned in redis? /// TODO: this map is going to grow forever unless we do some sort of pruning. maybe store pruned in redis?
/// all blocks, including orphans /// all blocks, including orphans
pub(super) block_hashes: BlockHashesMap, pub(super) block_hashes: BlockHashesMap,
pub(super) block_numbers: DashMap<U64, Vec<H256>>, /// blocks on the heaviest chain
pub(super) block_numbers: DashMap<U64, H256>,
/// TODO: this map is going to grow forever unless we do some sort of pruning. maybe store pruned in redis? /// TODO: this map is going to grow forever unless we do some sort of pruning. maybe store pruned in redis?
/// TODO: what should we use for edges? /// TODO: what should we use for edges?
pub(super) blockchain_graphmap: RwLock<DiGraphMap<H256, u32>>, pub(super) blockchain_graphmap: RwLock<DiGraphMap<H256, u32>>,
@ -168,7 +169,7 @@ impl Web3Connections {
let synced_connections = SyncedConnections::default(); let synced_connections = SyncedConnections::default();
let connections = Arc::new(Self { let connections = Arc::new(Self {
conns: connections, inner: connections,
synced_connections: ArcSwap::new(Arc::new(synced_connections)), synced_connections: ArcSwap::new(Arc::new(synced_connections)),
pending_transactions, pending_transactions,
block_hashes: Default::default(), block_hashes: Default::default(),
@ -198,7 +199,7 @@ impl Web3Connections {
} }
pub fn get(&self, conn_name: &str) -> Option<&Arc<Web3Connection>> { pub fn get(&self, conn_name: &str) -> Option<&Arc<Web3Connection>> {
self.conns.get(conn_name) self.inner.get(conn_name)
} }
/// subscribe to blocks and transactions from all the backend rpcs. /// subscribe to blocks and transactions from all the backend rpcs.
@ -349,7 +350,7 @@ impl Web3Connections {
// TODO: we are going to be checking "has_block_data" a lot now. i think we pretty much always have min_block_needed now that we override "latest" // TODO: we are going to be checking "has_block_data" a lot now. i think we pretty much always have min_block_needed now that we override "latest"
let mut synced_rpcs: Vec<Arc<Web3Connection>> = let mut synced_rpcs: Vec<Arc<Web3Connection>> =
if let Some(min_block_needed) = min_block_needed { if let Some(min_block_needed) = min_block_needed {
self.conns self.inner
.values() .values()
.filter(|x| !skip.contains(x)) .filter(|x| !skip.contains(x))
.filter(|x| x.has_block_data(min_block_needed)) .filter(|x| x.has_block_data(min_block_needed))
@ -435,7 +436,7 @@ impl Web3Connections {
// TODO: with capacity? // TODO: with capacity?
let mut selected_rpcs = vec![]; let mut selected_rpcs = vec![];
for connection in self.conns.values() { for connection in self.inner.values() {
if let Some(min_block_needed) = min_block_needed { if let Some(min_block_needed) = min_block_needed {
if !connection.has_block_data(min_block_needed) { if !connection.has_block_data(min_block_needed) {
continue; continue;
@ -476,7 +477,7 @@ impl Web3Connections {
// TODO: maximum retries? // TODO: maximum retries?
loop { loop {
if skip_rpcs.len() == self.conns.len() { if skip_rpcs.len() == self.inner.len() {
break; break;
} }
match self match self
@ -623,7 +624,7 @@ impl fmt::Debug for Web3Connections {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// TODO: the default formatter takes forever to write. this is too quiet though // TODO: the default formatter takes forever to write. this is too quiet though
f.debug_struct("Web3Connections") f.debug_struct("Web3Connections")
.field("conns", &self.conns) .field("conns", &self.inner)
.finish_non_exhaustive() .finish_non_exhaustive()
} }
} }
@ -633,7 +634,7 @@ impl Serialize for Web3Connections {
where where
S: Serializer, S: Serializer,
{ {
let conns: Vec<&Web3Connection> = self.conns.iter().map(|x| x.1.as_ref()).collect(); let conns: Vec<&Web3Connection> = self.inner.iter().map(|x| x.1.as_ref()).collect();
let mut state = serializer.serialize_struct("Web3Connections", 2)?; let mut state = serializer.serialize_struct("Web3Connections", 2)?;
state.serialize_field("conns", &conns)?; state.serialize_field("conns", &conns)?;

View File

@ -1,3 +1,4 @@
use super::blockchain::BlockId;
use super::connection::Web3Connection; use super::connection::Web3Connection;
use super::connections::Web3Connections; use super::connections::Web3Connections;
use ethers::prelude::{H256, U64}; use ethers::prelude::{H256, U64};
@ -11,8 +12,7 @@ use std::sync::Arc;
#[derive(Clone, Default, Serialize)] #[derive(Clone, Default, Serialize)]
pub struct SyncedConnections { pub struct SyncedConnections {
// TODO: store ArcBlock instead? // TODO: store ArcBlock instead?
pub(super) head_block_num: U64, pub(super) head_block_id: Option<BlockId>,
pub(super) head_block_hash: H256,
// TODO: this should be able to serialize, but it isn't // TODO: this should be able to serialize, but it isn't
#[serde(skip_serializing)] #[serde(skip_serializing)]
pub(super) conns: IndexSet<Arc<Web3Connection>>, pub(super) conns: IndexSet<Arc<Web3Connection>>,
@ -23,29 +23,31 @@ impl fmt::Debug for SyncedConnections {
// TODO: the default formatter takes forever to write. this is too quiet though // TODO: the default formatter takes forever to write. this is too quiet though
// TODO: print the actual conns? // TODO: print the actual conns?
f.debug_struct("SyncedConnections") f.debug_struct("SyncedConnections")
.field("head_num", &self.head_block_num) .field("head_block_id", &self.head_block_id)
.field("head_hash", &self.head_block_hash)
.field("num_conns", &self.conns.len()) .field("num_conns", &self.conns.len())
.finish_non_exhaustive() .finish_non_exhaustive()
} }
} }
impl Web3Connections { impl Web3Connections {
pub fn head_block(&self) -> (U64, H256) { pub fn head_block_id(&self) -> Option<BlockId> {
let synced_connections = self.synced_connections.load(); self.synced_connections.load().head_block_id.clone()
(
synced_connections.head_block_num,
synced_connections.head_block_hash,
)
} }
pub fn head_block_hash(&self) -> H256 { pub fn head_block_hash(&self) -> Option<H256> {
self.synced_connections.load().head_block_hash self.synced_connections
.load()
.head_block_id
.as_ref()
.map(|head_block_id| head_block_id.hash)
} }
pub fn head_block_num(&self) -> U64 { pub fn head_block_num(&self) -> Option<U64> {
self.synced_connections.load().head_block_num self.synced_connections
.load()
.head_block_id
.as_ref()
.map(|head_block_id| head_block_id.num)
} }
pub fn synced(&self) -> bool { pub fn synced(&self) -> bool {