web3-proxy/web3-proxy/src/connection.rs

///! Rate-limited communication with a web3 provider
use derive_more::From;
use ethers::prelude::{Block, Middleware, ProviderError, TxHash, H256};
use futures::StreamExt;
use governor::clock::{Clock, QuantaClock, QuantaInstant};
use governor::middleware::NoOpMiddleware;
use governor::state::{InMemoryState, NotKeyed};
use governor::NotUntil;
use governor::RateLimiter;
use std::fmt;
use std::num::NonZeroU32;
use std::sync::atomic::{self, AtomicU32};
use std::{cmp::Ordering, sync::Arc};
use tokio::sync::RwLock;
use tokio::task;
use tokio::time::{interval, sleep, timeout_at, Duration, Instant, MissedTickBehavior};
use tracing::{info, instrument, trace, warn};

type Web3RateLimiter =
    RateLimiter<NotKeyed, InMemoryState, QuantaClock, NoOpMiddleware<QuantaInstant>>;

/// TODO: instead of an enum, I tried to use Box<dyn Provider>, but hit https://github.com/gakonst/ethers-rs/issues/592
#[derive(From)]
pub enum Web3Provider {
    Http(ethers::providers::Provider<ethers::providers::Http>),
    Ws(ethers::providers::Provider<ethers::providers::Ws>),
}

impl Web3Provider {
    #[instrument]
    async fn from_str(url_str: &str, http_client: Option<reqwest::Client>) -> anyhow::Result<Self> {
        let provider = if url_str.starts_with("http") {
            let url: url::Url = url_str.parse()?;

            let http_client = http_client.ok_or_else(|| anyhow::anyhow!("no http_client"))?;

            let provider = ethers::providers::Http::new_with_client(url, http_client);

            // TODO: dry this up (needs https://github.com/gakonst/ethers-rs/issues/592)
            ethers::providers::Provider::new(provider)
                .interval(Duration::from_secs(1))
                .into()
        } else if url_str.starts_with("ws") {
            // TODO: wrapper automatically reconnect
            let provider = ethers::providers::Ws::connect(url_str).await?;

            // TODO: make sure this automatically reconnects

            // TODO: dry this up (needs https://github.com/gakonst/ethers-rs/issues/592)
            ethers::providers::Provider::new(provider)
                .interval(Duration::from_secs(1))
                .into()
        } else {
            return Err(anyhow::anyhow!("only http and ws servers are supported"));
        };

        Ok(provider)
    }
}

impl fmt::Debug for Web3Provider {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // TODO: the default Debug takes forever to write. this is too quiet though. we at least need the url
        f.debug_struct("Web3Provider").finish_non_exhaustive()
    }
}

/// An active connection to a Web3Rpc
pub struct Web3Connection {
    /// TODO: can we get this from the provider? do we even need it?
    url: String,
    /// keep track of currently open requests. We sort on this
    active_requests: AtomicU32,
    // TODO: put this in a RwLock so that we can replace it if re-connecting
    provider: RwLock<Arc<Web3Provider>>,
    ratelimiter: Option<Web3RateLimiter>,
    /// used for load balancing to the least loaded server
    soft_limit: u32,
    /// the same clock that is used by the rate limiter
    clock: QuantaClock,
}

impl fmt::Debug for Web3Connection {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("Web3Connection")
            .field("url", &self.url)
            .finish_non_exhaustive()
    }
}

impl fmt::Display for Web3Connection {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", &self.url)
    }
}

impl Web3Connection {
    #[instrument(skip_all)]
    pub async fn reconnect(
        self: &Arc<Self>,
        block_sender: &flume::Sender<(u64, H256, Arc<Self>)>,
    ) -> anyhow::Result<()> {
        // websocket doesn't need the http client
        let http_client = None;

        // since this lock is held open over an await, we use tokio's locking
        let mut provider = self.provider.write().await;

        // TODO: tell the block subscriber that we are at 0
        block_sender
            .send_async((0, H256::default(), self.clone()))
            .await?;

        let new_provider = Web3Provider::from_str(&self.url, http_client).await?;

        *provider = Arc::new(new_provider);

        Ok(())
    }

    /// Connect to a web3 rpc and subscribe to new heads
    #[instrument(name = "try_new_Web3Connection", skip(clock, http_client))]
    pub async fn try_new(
        chain_id: usize,
        url_str: String,
        // optional because this is only used for http providers. websocket providers don't use it
        http_client: Option<reqwest::Client>,
        hard_rate_limit: Option<u32>,
        clock: &QuantaClock,
        // TODO: think more about this type
        soft_limit: u32,
    ) -> anyhow::Result<Arc<Web3Connection>> {
        let hard_rate_limiter = if let Some(hard_rate_limit) = hard_rate_limit {
            let quota = governor::Quota::per_second(NonZeroU32::new(hard_rate_limit).unwrap());

            let rate_limiter = governor::RateLimiter::direct_with_clock(quota, clock);

            Some(rate_limiter)
        } else {
            None
        };

        let provider = Web3Provider::from_str(&url_str, http_client).await?;

        let connection = Web3Connection {
            clock: clock.clone(),
            url: url_str.clone(),
            active_requests: 0.into(),
            provider: RwLock::new(Arc::new(provider)),
            ratelimiter: hard_rate_limiter,
            soft_limit,
        };

        let connection = Arc::new(connection);

        // check the server's chain_id here
        let active_request_handle = connection.wait_for_request_handle().await;
        // TODO: some public rpcs (on bsc and fantom) do not return an id and so this ends up being an error
        let found_chain_id: Result<String, _> = active_request_handle
            .request("eth_chainId", Option::None::<()>)
            .await;

        match found_chain_id {
            Ok(found_chain_id) => {
                let found_chain_id =
                    usize::from_str_radix(found_chain_id.trim_start_matches("0x"), 16).unwrap();

                if chain_id != found_chain_id {
                    return Err(anyhow::anyhow!(
                        "incorrect chain id! Expected {}. Found {}",
                        chain_id,
                        found_chain_id
                    ));
                }
            }
            Err(e) => {
                let e = anyhow::Error::from(e).context(format!("{}", connection));
                return Err(e);
            }
        }

        info!("Successful connection: {}", connection);

        Ok(connection)
    }

    #[inline]
    pub fn active_requests(&self) -> u32 {
        self.active_requests.load(atomic::Ordering::Acquire)
    }

    #[inline]
    pub fn soft_limit(&self) -> u32 {
        self.soft_limit
    }

    #[inline]
    pub fn url(&self) -> &str {
        &self.url
    }

    #[instrument(skip_all)]
    async fn send_block(
        self: &Arc<Self>,
        block: Result<Block<TxHash>, ProviderError>,
        block_sender: &flume::Sender<(u64, H256, Arc<Self>)>,
    ) {
        match block {
            Ok(block) => {
                let block_number = block.number.unwrap().as_u64();
                let block_hash = block.hash.unwrap();

                // TODO: i'm pretty sure we don't need send_async, but double check
                block_sender
                    .send_async((block_number, block_hash, self.clone()))
                    .await
                    .unwrap();
            }
            Err(e) => {
                warn!("unable to get block from {}: {}", self, e);
            }
        }
    }

    /// Subscribe to new blocks. If `reconnect` is true, this runs forever.
    /// TODO: instrument with the url
    #[instrument(skip_all)]
    pub async fn subscribe_new_heads(
        self: Arc<Self>,
        block_sender: flume::Sender<(u64, H256, Arc<Self>)>,
        reconnect: bool,
    ) -> anyhow::Result<()> {
        loop {
            info!("Watching new_heads on {}", self);

            // TODO: is a RwLock of Arc the right thing here?
            let provider = self.provider.read().await.clone();

            match &*provider {
                Web3Provider::Http(provider) => {
                    // there is a "watch_blocks" function, but a lot of public nodes do not support the necessary rpc endpoints
                    // TODO: what should this interval be? probably some fraction of block time. set automatically?
                    // TODO: maybe it would be better to have one interval for all of the http providers, but this works for now
                    // TODO: if there are some websocket providers, maybe have a longer interval and a channel that tells the https to update when a websocket gets a new head? if they are slow this wouldn't work well though
                    let mut interval = interval(Duration::from_secs(2));
                    interval.set_missed_tick_behavior(MissedTickBehavior::Delay);

                    let mut last_hash = Default::default();

                    loop {
                        // wait for the interval
                        // TODO: if error or rate limit, increase interval?
                        interval.tick().await;

                        match self.try_request_handle() {
                            Ok(active_request_handle) => {
                                // TODO: i feel like this should be easier. there is a provider.getBlock, but i don't know how to give it "latest"
                                let block: Result<Block<TxHash>, _> = provider
                                    .request("eth_getBlockByNumber", ("latest", false))
                                    .await;

                                drop(active_request_handle);

                                // don't send repeat blocks
                                if let Ok(block) = &block {
                                    let new_hash = block.hash.unwrap();

                                    if new_hash == last_hash {
                                        continue;
                                    }

                                    last_hash = new_hash;
                                }

                                self.send_block(block, &block_sender).await;
                            }
                            Err(e) => {
                                warn!("Failed getting latest block from {}: {:?}", self, e);
                            }
                        }
                    }
                }
                Web3Provider::Ws(provider) => {
                    // rate limits
                    let active_request_handle = self.wait_for_request_handle().await;

                    // TODO: automatically reconnect?
                    // TODO: it would be faster to get the block number, but subscriptions don't provide that
                    // TODO: maybe we can do provider.subscribe("newHeads") and then parse into a custom struct that only gets the number out?
                    let mut stream = provider.subscribe_blocks().await?;

                    drop(active_request_handle);
                    let active_request_handle = self.wait_for_request_handle().await;

                    // query the block once since the subscription doesn't send the current block
                    // there is a very small race condition here where the stream could send us a new block right now
                    // all it does is print "new block" for the same block as current block
                    // TODO: rate limit!
                    let block: Result<Block<TxHash>, _> = active_request_handle
                        .request("eth_getBlockByNumber", ("latest", false))
                        .await;

                    self.send_block(block, &block_sender).await;

                    // TODO: what should this timeout be? needs to be larger than worst case block time
                    // TODO: although reconnects will make this less of an issue
                    loop {
                        match timeout_at(Instant::now() + Duration::from_secs(300), stream.next())
                            .await
                        {
                            Ok(Some(new_block)) => {
                                self.send_block(Ok(new_block), &block_sender).await;

                                // TODO: really not sure about this
                                task::yield_now().await;
                            }
                            Ok(None) => {
                                warn!("subscription ended");
                                break;
                            }
                            Err(e) => {
                                warn!("subscription ended with an error: {:?}", e);
                                break;
                            }
                        }
                    }
                }
            }

            if reconnect {
                drop(provider);

                // TODO: exponential backoff
                warn!("new heads subscription exited. reconnecting in 10 seconds...");
                sleep(Duration::from_secs(10)).await;

                self.reconnect(&block_sender).await?;
            } else {
                break;
            }
        }

        info!("Done watching new_heads on {}", self);
        Ok(())
    }

    #[instrument(skip_all)]
    pub async fn wait_for_request_handle(self: &Arc<Self>) -> ActiveRequestHandle {
        // TODO: maximum wait time

        for _ in 0..10 {
            match self.try_request_handle() {
                Ok(pending_request_handle) => return pending_request_handle,
                Err(not_until) => {
                    let deadline = not_until.wait_time_from(self.clock.now());

                    sleep(deadline).await;
                }
            }
        }

        // TODO: what should we do?
        panic!("no request handle after 10 tries");
    }

    pub fn try_request_handle(
        self: &Arc<Self>,
    ) -> Result<ActiveRequestHandle, NotUntil<QuantaInstant>> {
        // check rate limits
        if let Some(ratelimiter) = self.ratelimiter.as_ref() {
            match ratelimiter.check() {
                Ok(_) => {
                    // rate limit succeeded
                    return Ok(ActiveRequestHandle::new(self.clone()));
                }
                Err(not_until) => {
                    // rate limit failed
                    // save the smallest not_until. if nothing succeeds, return an Err with not_until in it
                    // TODO: use tracing better
                    warn!("Exhausted rate limit on {:?}: {}", self, not_until);

                    return Err(not_until);
                }
            }
        };

        Ok(ActiveRequestHandle::new(self.clone()))
    }
}

/// Drop this once a connection completes
pub struct ActiveRequestHandle(Arc<Web3Connection>);

impl ActiveRequestHandle {
    fn new(connection: Arc<Web3Connection>) -> Self {
        // TODO: attach a unique id to this
        // TODO: what ordering?!
        connection
            .active_requests
            .fetch_add(1, atomic::Ordering::AcqRel);

        Self(connection)
    }

    /// Send a web3 request
    /// By having the request method here, we ensure that the rate limiter was called and connection counts were properly incremented
    /// By taking self here, we ensure that this is dropped after the request is complete
    #[instrument(skip_all)]
    pub async fn request<T, R>(
        &self,
        method: &str,
        params: T,
    ) -> Result<R, ethers::prelude::ProviderError>
    where
        T: fmt::Debug + serde::Serialize + Send + Sync,
        R: serde::Serialize + serde::de::DeserializeOwned + fmt::Debug,
    {
        // TODO: use tracing spans properly
        // TODO: it would be nice to have the request id on this
        // TODO: including params in this is way too verbose
        trace!("Sending {} to {}", method, self.0);

        let provider = self.0.provider.read().await.clone();

        let response = match &*provider {
            Web3Provider::Http(provider) => provider.request(method, params).await,
            Web3Provider::Ws(provider) => provider.request(method, params).await,
        };

        // TODO: i think ethers already has trace logging (and does it much more fancy)
        // TODO: at least instrument this with more useful information
        // trace!("Reply from {}: {:?}", self.0, response);
        trace!("Reply from {}", self.0);

        response
    }
}

impl Drop for ActiveRequestHandle {
    fn drop(&mut self) {
        self.0
            .active_requests
            .fetch_sub(1, atomic::Ordering::AcqRel);
    }
}

impl Eq for Web3Connection {}

impl Ord for Web3Connection {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        // TODO: what atomic ordering?!
        let a = self.active_requests.load(atomic::Ordering::Acquire);
        let b = other.active_requests.load(atomic::Ordering::Acquire);

        // TODO: how should we include the soft limit? floats are slower than integer math
        let a = a as f32 / self.soft_limit as f32;
        let b = b as f32 / other.soft_limit as f32;

        a.partial_cmp(&b).unwrap()
    }
}

impl PartialOrd for Web3Connection {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

/// note that this is just comparing the active requests. two providers with different rpc urls are equal!
impl PartialEq for Web3Connection {
    fn eq(&self, other: &Self) -> bool {
        // TODO: what ordering?!
        self.active_requests.load(atomic::Ordering::Acquire)
            == other.active_requests.load(atomic::Ordering::Acquire)
    }
}