clones to avoid deadlock
This commit is contained in:
parent
f010166ee0
commit
33f7256236
@ -453,8 +453,9 @@ impl Web3Connections {
|
||||
maybe_head_block = parent_block;
|
||||
continue;
|
||||
} else {
|
||||
// TODO: this message
|
||||
warn!(
|
||||
"no parent to check. soft limit only {}/{} from {}/{} rpcs: {}%",
|
||||
"soft limit {}/{} from {}/{} rpcs: {}%",
|
||||
highest_rpcs_sum_soft_limit,
|
||||
self.min_sum_soft_limit,
|
||||
highest_rpcs.len(),
|
||||
|
@ -27,7 +27,7 @@ use thread_fast_rng::thread_fast_rng;
|
||||
use tokio::sync::{broadcast, oneshot, RwLock as AsyncRwLock};
|
||||
use tokio::time::{interval, sleep, sleep_until, timeout, Duration, Instant, MissedTickBehavior};
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ProviderState {
|
||||
None,
|
||||
NotReady(Arc<Web3Provider>),
|
||||
@ -365,8 +365,6 @@ impl Web3Connection {
|
||||
sleep(retry_in).await;
|
||||
}
|
||||
|
||||
info!("connected to {}", self);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -377,9 +375,12 @@ impl Web3Connection {
|
||||
chain_id: u64,
|
||||
db_conn: Option<&DatabaseConnection>,
|
||||
) -> anyhow::Result<()> {
|
||||
trace!("provider_state {} locking...", self);
|
||||
let mut provider_state = self.provider_state.write().await;
|
||||
trace!("provider_state {} locked: {:?}", self, provider_state);
|
||||
// trace!("provider_state {} locking...", self);
|
||||
let mut provider_state = self
|
||||
.provider_state
|
||||
.try_write()
|
||||
.context("locking provider for write")?;
|
||||
// trace!("provider_state {} locked: {:?}", self, provider_state);
|
||||
|
||||
match &*provider_state {
|
||||
ProviderState::None => {
|
||||
@ -391,18 +392,18 @@ impl Web3Connection {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
trace!("Reconnecting to {}", self);
|
||||
debug!("reconnecting to {}", self);
|
||||
|
||||
// disconnect the current provider
|
||||
*provider_state = ProviderState::None;
|
||||
|
||||
// reset sync status
|
||||
trace!("locking head block on {}", self);
|
||||
// trace!("locking head block on {}", self);
|
||||
{
|
||||
let mut head_block = self.head_block.write();
|
||||
*head_block = None;
|
||||
}
|
||||
trace!("done with head block on {}", self);
|
||||
// trace!("done with head block on {}", self);
|
||||
|
||||
// tell the block subscriber that we don't have any blocks
|
||||
if let Some(block_sender) = block_sender {
|
||||
@ -414,15 +415,15 @@ impl Web3Connection {
|
||||
}
|
||||
}
|
||||
|
||||
trace!("Creating new Web3Provider on {}", self);
|
||||
// trace!("Creating new Web3Provider on {}", self);
|
||||
// TODO: if this fails, keep retrying! otherwise it crashes and doesn't try again!
|
||||
let new_provider = Web3Provider::from_str(&self.url, self.http_client.clone()).await?;
|
||||
|
||||
// TODO: if an error happens,
|
||||
// trace!("saving provider state as NotReady on {}", self);
|
||||
*provider_state = ProviderState::NotReady(Arc::new(new_provider));
|
||||
|
||||
// drop the lock so that we can get a request handle
|
||||
trace!("provider_state {} unlocked", self);
|
||||
// trace!("provider_state {} unlocked", self);
|
||||
drop(provider_state);
|
||||
|
||||
let authorization = Arc::new(Authorization::internal(db_conn.cloned())?);
|
||||
@ -430,7 +431,7 @@ impl Web3Connection {
|
||||
// check the server's chain_id here
|
||||
// TODO: some public rpcs (on bsc and fantom) do not return an id and so this ends up being an error
|
||||
// TODO: what should the timeout be? should there be a request timeout?
|
||||
trace!("waiting on chain id for {}", self);
|
||||
// trace!("waiting on chain id for {}", self);
|
||||
let found_chain_id: Result<U64, _> = self
|
||||
.wait_for_request_handle(&authorization, Duration::from_secs(30), true)
|
||||
.await?
|
||||
@ -440,8 +441,7 @@ impl Web3Connection {
|
||||
Level::Trace.into(),
|
||||
)
|
||||
.await;
|
||||
|
||||
trace!("found_chain_id: {:?}", found_chain_id);
|
||||
// trace!("found_chain_id: {:?}", found_chain_id);
|
||||
|
||||
match found_chain_id {
|
||||
Ok(found_chain_id) => {
|
||||
@ -468,7 +468,9 @@ impl Web3Connection {
|
||||
self.check_block_data_limit(&authorization).await?;
|
||||
|
||||
{
|
||||
// trace!("locking for ready...");
|
||||
let mut provider_state = self.provider_state.write().await;
|
||||
// trace!("locked for ready...");
|
||||
|
||||
// TODO: do this without a clone
|
||||
let ready_provider = provider_state
|
||||
@ -478,6 +480,7 @@ impl Web3Connection {
|
||||
.clone();
|
||||
|
||||
*provider_state = ProviderState::Ready(ready_provider);
|
||||
// trace!("unlocked for ready...");
|
||||
}
|
||||
|
||||
info!("successfully connected to {}", self);
|
||||
@ -595,13 +598,16 @@ impl Web3Connection {
|
||||
|
||||
// wait before doing the initial health check
|
||||
// TODO: how often?
|
||||
// TODO: subscribe to self.head_block
|
||||
let health_sleep_seconds = 10;
|
||||
sleep(Duration::from_secs(health_sleep_seconds)).await;
|
||||
|
||||
let mut warned = 0;
|
||||
|
||||
loop {
|
||||
// TODO: what if we just happened to have this check line up with another restart?
|
||||
// TODO: think more about this
|
||||
trace!("health check on {}", conn);
|
||||
// trace!("health check on {}. locking...", conn);
|
||||
if conn
|
||||
.provider_state
|
||||
.read()
|
||||
@ -610,15 +616,37 @@ impl Web3Connection {
|
||||
.await
|
||||
.is_none()
|
||||
{
|
||||
// trace!("health check unlocked with error on {}", conn);
|
||||
// returning error will trigger a reconnect
|
||||
return Err(anyhow::anyhow!("{} is not ready", conn));
|
||||
}
|
||||
// trace!("health check on {}. unlocked", conn);
|
||||
|
||||
if let Some(x) = &*conn.head_block.read() {
|
||||
// if this block is too old, return an error so we reconnect
|
||||
if x.lag() > 0 {
|
||||
// TODO: instead of a full reconnect, we should maybe just set it to None
|
||||
return Err(anyhow::anyhow!("{} is lagged: {:?}", conn, x));
|
||||
let current_lag = x.lag();
|
||||
if current_lag > 0 {
|
||||
let level = if warned == 0 {
|
||||
log::Level::Warn
|
||||
} else if current_lag % 1000 == 0 {
|
||||
log::Level::Debug
|
||||
} else {
|
||||
log::Level::Trace
|
||||
};
|
||||
|
||||
log::log!(
|
||||
level,
|
||||
"{} is lagged {} secs: {} {}",
|
||||
conn,
|
||||
current_lag,
|
||||
x.number(),
|
||||
x.hash(),
|
||||
);
|
||||
|
||||
warned += 1;
|
||||
} else {
|
||||
// reset warnings now that we are connected
|
||||
warned = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -658,7 +686,7 @@ impl Web3Connection {
|
||||
}
|
||||
Err(err) => {
|
||||
if reconnect {
|
||||
warn!("{} connected ended. err={:?}", self, err);
|
||||
warn!("{} connection ended. err={:?}", self, err);
|
||||
|
||||
self.clone()
|
||||
.retrying_connect(
|
||||
@ -691,7 +719,16 @@ impl Web3Connection {
|
||||
) -> anyhow::Result<()> {
|
||||
trace!("watching new heads on {}", self);
|
||||
|
||||
if let ProviderState::Ready(provider) = &*self.provider_state.read().await {
|
||||
// trace!("locking on new heads");
|
||||
let provider_state = self
|
||||
.provider_state
|
||||
.try_read()
|
||||
.context("subscribe_new_heads")?
|
||||
.clone();
|
||||
// trace!("unlocked on new heads");
|
||||
|
||||
// TODO: need a timeout
|
||||
if let ProviderState::Ready(provider) = provider_state {
|
||||
match provider.as_ref() {
|
||||
Web3Provider::Mock => unimplemented!(),
|
||||
Web3Provider::Http(_provider) => {
|
||||
@ -863,7 +900,12 @@ impl Web3Connection {
|
||||
authorization: Arc<Authorization>,
|
||||
tx_id_sender: flume::Sender<(TxHash, Arc<Self>)>,
|
||||
) -> anyhow::Result<()> {
|
||||
if let ProviderState::Ready(provider) = &*self.provider_state.read().await {
|
||||
if let ProviderState::Ready(provider) = self
|
||||
.provider_state
|
||||
.try_read()
|
||||
.context("subscribe_pending_transactions")?
|
||||
.clone()
|
||||
{
|
||||
trace!("watching pending transactions on {}", self);
|
||||
match provider.as_ref() {
|
||||
Web3Provider::Mock => unimplemented!(),
|
||||
@ -974,18 +1016,6 @@ impl Web3Connection {
|
||||
// TODO? ready_provider: Option<&Arc<Web3Provider>>,
|
||||
allow_not_ready: bool,
|
||||
) -> anyhow::Result<OpenRequestResult> {
|
||||
if self
|
||||
.provider_state
|
||||
.read()
|
||||
.await
|
||||
.provider(allow_not_ready)
|
||||
.await
|
||||
.is_none()
|
||||
{
|
||||
// TODO: emit a stat?
|
||||
return Ok(OpenRequestResult::NotReady);
|
||||
}
|
||||
|
||||
// check rate limits
|
||||
if let Some(ratelimiter) = self.hard_limit.as_ref() {
|
||||
// TODO: how should we know if we should set expire or not?
|
||||
@ -1008,7 +1038,7 @@ impl Web3Connection {
|
||||
}
|
||||
};
|
||||
|
||||
let handle = OpenRequestHandle::new(authorization.clone(), self.clone());
|
||||
let handle = OpenRequestHandle::new(authorization.clone(), self.clone()).await;
|
||||
|
||||
Ok(OpenRequestResult::Handle(handle))
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ use entities::revert_log;
|
||||
use entities::sea_orm_active_enums::Method;
|
||||
use ethers::providers::{HttpClientError, ProviderError, WsClientError};
|
||||
use ethers::types::{Address, Bytes};
|
||||
use log::{debug, error, trace, warn, Level};
|
||||
use log::{debug, error, info, trace, warn, Level};
|
||||
use metered::metered;
|
||||
use metered::HitCount;
|
||||
use metered::ResponseTime;
|
||||
@ -37,6 +37,7 @@ pub struct OpenRequestHandle {
|
||||
conn: Arc<Web3Connection>,
|
||||
// TODO: this is the same metrics on the conn. use a reference?
|
||||
metrics: Arc<OpenRequestHandleMetrics>,
|
||||
provider: Arc<Web3Provider>,
|
||||
used: AtomicBool,
|
||||
}
|
||||
|
||||
@ -129,7 +130,7 @@ impl Authorization {
|
||||
|
||||
#[metered(registry = OpenRequestHandleMetrics, visibility = pub)]
|
||||
impl OpenRequestHandle {
|
||||
pub fn new(authorization: Arc<Authorization>, conn: Arc<Web3Connection>) -> Self {
|
||||
pub async fn new(authorization: Arc<Authorization>, conn: Arc<Web3Connection>) -> Self {
|
||||
// TODO: take request_id as an argument?
|
||||
// TODO: attach a unique id to this? customer requests have one, but not internal queries
|
||||
// TODO: what ordering?!
|
||||
@ -137,6 +138,38 @@ impl OpenRequestHandle {
|
||||
// TODO: these should maybe be sent to an influxdb instance?
|
||||
conn.active_requests.fetch_add(1, atomic::Ordering::Relaxed);
|
||||
|
||||
let mut provider = None;
|
||||
let mut logged = false;
|
||||
while provider.is_none() {
|
||||
// trace!("waiting on provider: locking...");
|
||||
|
||||
let ready_provider = conn
|
||||
.provider_state
|
||||
.read()
|
||||
.await
|
||||
// TODO: hard code true, or take a bool in the `new` function?
|
||||
.provider(true)
|
||||
.await
|
||||
.cloned();
|
||||
// trace!("waiting on provider: unlocked!");
|
||||
|
||||
match ready_provider {
|
||||
None => {
|
||||
if !logged {
|
||||
logged = true;
|
||||
warn!("no provider for {}!", conn);
|
||||
}
|
||||
|
||||
// TODO: how should this work? a reconnect should be in progress. but maybe force one now?
|
||||
// TODO: sleep how long? subscribe to something instead? maybe use a watch handle?
|
||||
// TODO: this is going to be way too verbose!
|
||||
sleep(Duration::from_millis(100)).await
|
||||
}
|
||||
Some(x) => provider = Some(x),
|
||||
}
|
||||
}
|
||||
let provider = provider.expect("provider was checked already");
|
||||
|
||||
// TODO: handle overflows?
|
||||
// TODO: what ordering?
|
||||
match authorization.as_ref().authorization_type {
|
||||
@ -157,6 +190,7 @@ impl OpenRequestHandle {
|
||||
authorization,
|
||||
conn,
|
||||
metrics,
|
||||
provider,
|
||||
used,
|
||||
}
|
||||
}
|
||||
@ -193,41 +227,10 @@ impl OpenRequestHandle {
|
||||
// the authorization field is already on a parent span
|
||||
// trace!(rpc=%self.conn, %method, "request");
|
||||
|
||||
let mut provider = None;
|
||||
let mut logged = false;
|
||||
while provider.is_none() {
|
||||
let ready_provider = self
|
||||
.conn
|
||||
.provider_state
|
||||
.read()
|
||||
.await
|
||||
// TODO: hard code true, or take a bool in the `new` function?
|
||||
.provider(true)
|
||||
.await
|
||||
.cloned();
|
||||
|
||||
match ready_provider {
|
||||
None => {
|
||||
if !logged {
|
||||
logged = true;
|
||||
warn!("no provider for {}!", self.conn);
|
||||
}
|
||||
|
||||
// TODO: how should this work? a reconnect should be in progress. but maybe force one now?
|
||||
// TODO: sleep how long? subscribe to something instead? maybe use a watch handle?
|
||||
// TODO: this is going to be way too verbose!
|
||||
sleep(Duration::from_millis(100)).await
|
||||
}
|
||||
Some(x) => provider = Some(x),
|
||||
}
|
||||
}
|
||||
|
||||
let provider = provider.expect("provider was checked already");
|
||||
|
||||
// trace!("got provider for {:?}", self);
|
||||
|
||||
// TODO: really sucks that we have to clone here
|
||||
let response = match &*provider {
|
||||
let response = match &*self.provider {
|
||||
Web3Provider::Mock => unimplemented!(),
|
||||
Web3Provider::Http(provider) => provider.request(method, params).await,
|
||||
Web3Provider::Ws(provider) => provider.request(method, params).await,
|
||||
@ -273,7 +276,7 @@ impl OpenRequestHandle {
|
||||
// check for "execution reverted" here
|
||||
let is_revert = if let ProviderError::JsonRpcClientError(err) = err {
|
||||
// Http and Ws errors are very similar, but different types
|
||||
let msg = match &*provider {
|
||||
let msg = match &*self.provider {
|
||||
Web3Provider::Mock => unimplemented!(),
|
||||
Web3Provider::Http(_) => {
|
||||
if let Some(HttpClientError::JsonRpcError(err)) =
|
||||
|
Loading…
Reference in New Issue
Block a user