web3-proxy/web3_proxy/src/sub_commands/proxyd.rs

use crate::app::{flatten_handle, flatten_handles, Web3ProxyApp};
use crate::compute_units::default_usd_per_cu;
use crate::config::TopConfig;
use crate::globals::global_db_conn;
use crate::stats::FlushedStats;
use crate::{frontend, prometheus};
use argh::FromArgs;
use futures::StreamExt;
use num::Zero;
use std::path::PathBuf;
use std::sync::atomic::AtomicU16;
use std::sync::Arc;
use std::time::Duration;
use std::{fs, thread};
use tokio::select;
use tokio::sync::{broadcast, mpsc, oneshot};
use tokio::time::{sleep_until, Instant};
use tracing::{error, info, trace, warn};

/// start the main proxy daemon
#[derive(FromArgs, PartialEq, Debug, Eq)]
#[argh(subcommand, name = "proxyd")]
pub struct ProxydSubCommand {
    /// path to a toml of rpc servers
    /// what port the proxy should listen on
    #[argh(option, default = "8544")]
    pub port: u16,

    /// what port the proxy should expose prometheus stats on
    #[argh(option, default = "8543")]
    pub prometheus_port: u16,
}

impl ProxydSubCommand {
    pub async fn main(
        self,
        top_config: TopConfig,
        top_config_path: PathBuf,
        num_workers: usize,
    ) -> anyhow::Result<()> {
        let (frontend_shutdown_sender, _) = broadcast::channel(1);
        // TODO: i think there is a small race. if config_path changes

        let frontend_port = Arc::new(self.port.into());
        let prometheus_port = Arc::new(self.prometheus_port.into());
        let (flush_stat_buffer_sender, flush_stat_buffer_receiver) = mpsc::channel(8);

        Self::_main(
            top_config,
            Some(top_config_path),
            frontend_port,
            prometheus_port,
            num_workers,
            frontend_shutdown_sender,
            flush_stat_buffer_sender,
            flush_stat_buffer_receiver,
        )
        .await
    }

    /// this shouldn't really be pub except it makes test fixtures easier
    #[allow(clippy::too_many_arguments)]
    pub async fn _main(
        mut top_config: TopConfig,
        top_config_path: Option<PathBuf>,
        frontend_port: Arc<AtomicU16>,
        prometheus_port: Arc<AtomicU16>,
        num_workers: usize,
        frontend_shutdown_sender: broadcast::Sender<()>,
        flush_stat_buffer_sender: mpsc::Sender<oneshot::Sender<FlushedStats>>,
        flush_stat_buffer_receiver: mpsc::Receiver<oneshot::Sender<FlushedStats>>,
    ) -> anyhow::Result<()> {
        // TODO: this is gross but it works. i'd rather it be called by serde, but it needs to know the chain id
        if top_config.app.usd_per_cu.is_none() {
            top_config.app.usd_per_cu = Some(default_usd_per_cu(top_config.app.chain_id));
        }

        // tokio has code for catching ctrl+c so we use that to shut down in most cases
        // frontend_shutdown_sender is currently only used in tests, but we might make a /shutdown endpoint or something
        // we do not need this receiver. new receivers are made by `shutdown_sender.subscribe()`
        let (app_shutdown_sender, _app_shutdown_receiver) = broadcast::channel(1);

        let frontend_shutdown_receiver = frontend_shutdown_sender.subscribe();
        let prometheus_shutdown_receiver = app_shutdown_sender.subscribe();

        // TODO: should we use a watch or broadcast for these?
        let (frontend_shutdown_complete_sender, mut frontend_shutdown_complete_receiver) =
            broadcast::channel(1);

        // start the main app
        let mut spawned_app = Web3ProxyApp::spawn(
            frontend_port,
            prometheus_port,
            top_config.clone(),
            num_workers,
            app_shutdown_sender.clone(),
            flush_stat_buffer_sender,
            flush_stat_buffer_receiver,
        )
        .await?;

        let mut head_block_receiver = spawned_app.app.head_block_receiver();

        // start thread for watching config
        if let Some(top_config_path) = top_config_path {
            let config_sender = spawned_app.new_top_config;
            {
                let mut current_config = config_sender.borrow().clone();

                thread::spawn(move || loop {
                    match fs::read_to_string(&top_config_path) {
                        Ok(new_top_config) => match toml::from_str::<TopConfig>(&new_top_config) {
                            Ok(mut new_top_config) => {
                                if new_top_config.app.usd_per_cu.is_none() {
                                    new_top_config.app.usd_per_cu =
                                        Some(default_usd_per_cu(new_top_config.app.chain_id));
                                }

                                if new_top_config != current_config {
                                    trace!("current_config: {:#?}", current_config);
                                    trace!("new_top_config: {:#?}", new_top_config);

                                    // TODO: print the differences
                                    // TODO: first run seems to always see differences. why?
                                    info!("config @ {:?} changed", top_config_path);
                                    config_sender.send(new_top_config.clone()).unwrap();
                                    current_config = new_top_config;
                                }
                            }
                            Err(err) => {
                                // TODO: panic?
                                error!("Unable to parse config! {:#?}", err);
                            }
                        },
                        Err(err) => {
                            // TODO: panic?
                            error!("Unable to read config! {:#?}", err);
                        }
                    }

                    thread::sleep(Duration::from_secs(10));
                });
            }
        }

        // start the prometheus metrics port
        let prometheus_handle = tokio::spawn(prometheus::serve(
            spawned_app.app.clone(),
            prometheus_shutdown_receiver,
        ));

        if spawned_app.app.config.db_url.is_some() {
            // give 30 seconds for the db to connect. if it does not connect, it will keep retrying
        }

        info!("waiting for head block");
        let max_wait_until = Instant::now() + Duration::from_secs(35);
        loop {
            select! {
                _ = sleep_until(max_wait_until) => {
                    return Err(anyhow::anyhow!("oh no! we never got a head block!"))
                }
                _ = head_block_receiver.changed() => {
                    if let Some(head_block) = spawned_app
                        .app
                        .head_block_receiver()
                        .borrow_and_update()
                        .as_ref()
                    {
                        info!(head_hash=?head_block.hash(), head_num=%head_block.number());
                        break;
                    } else {
                        info!("no head block yet!");
                        continue;
                    }
                }
            }
        }

        // start the frontend port
        let frontend_handle = tokio::spawn(frontend::serve(
            spawned_app.app.clone(),
            frontend_shutdown_receiver,
            frontend_shutdown_complete_sender,
        ));

        let frontend_handle = flatten_handle(frontend_handle);

        // if everything is working, these should all run forever
        let mut exited_with_err = false;
        let mut frontend_exited = false;
        tokio::select! {
            x = flatten_handles(spawned_app.app_handles) => {
                match x {
                    Ok(_) => info!("app_handle exited"),
                    Err(e) => {
                        error!("app_handle exited: {:#?}", e);
                        exited_with_err = true;
                    }
                }
            }
            x = frontend_handle => {
                frontend_exited = true;
                match x {
                    Ok(_) => info!("frontend exited"),
                    Err(e) => {
                        error!("frontend exited: {:#?}", e);
                        exited_with_err = true;
                    }
                }
            }
            x = flatten_handle(prometheus_handle) => {
                match x {
                    Ok(_) => info!("prometheus exited"),
                    Err(e) => {
                        error!("prometheus exited: {:#?}", e);
                        exited_with_err = true;
                    }
                }
            }
            x = tokio::signal::ctrl_c() => {
                // TODO: unix terminate signal, too
                match x {
                    Ok(_) => info!("quiting from ctrl-c"),
                    Err(e) => {
                        // TODO: i don't think this is possible
                        error!("error quiting from ctrl-c: {:#?}", e);
                        exited_with_err = true;
                    }
                }
            }
            // TODO: This seems to have been removed on the main branch
            // TODO: how can we properly watch background handles here? this returns None immediatly and the app exits. i think the bug is somewhere else though
            x = spawned_app.background_handles.next() => {
                match x {
                    Some(Ok(_)) => info!("quiting from background handles"),
                    Some(Err(e)) => {
                        error!("quiting from background handle error: {:#?}", e);
                        exited_with_err = true;
                    }
                    None => {
                        // TODO: is this an error?
                        warn!("background handles exited");
                    }
                }
            }
        };

        // TODO: This is also not there on the main branch
        // if a future above completed, make sure the frontend knows to start turning off
        if !frontend_exited {
            if let Err(err) = frontend_shutdown_sender.send(()) {
                // TODO: this is actually expected if the frontend is already shut down
                warn!(?err, "shutdown sender");
            };
        }

        // TODO: Also not there on main branch
        // TODO: wait until the frontend completes
        if let Err(err) = frontend_shutdown_complete_receiver.recv().await {
            warn!(?err, "shutdown completition");
        } else {
            info!("frontend exited gracefully");
        }

        // now that the frontend is complete, tell all the other futures to finish
        if let Err(err) = app_shutdown_sender.send(()) {
            warn!(?err, "backend sender");
        };

        info!(
            "waiting on {} important background tasks",
            spawned_app.background_handles.len()
        );
        let mut background_errors = 0;
        while let Some(x) = spawned_app.background_handles.next().await {
            match x {
                Err(e) => {
                    error!("{:?}", e);
                    background_errors += 1;
                }
                Ok(Err(e)) => {
                    error!("{:?}", e);
                    background_errors += 1;
                }
                Ok(Ok(_)) => {
                    // TODO: how can we know which handle exited?
                    trace!("a background handle exited");
                    continue;
                }
            }
        }

        // TODO: make sure this happens even if we exit with an error
        if let Ok(db_conn) = global_db_conn().await {
            /*
            From the sqlx docs:

            We recommend calling .close().await to gracefully close the pool and its connections when you are done using it.
            This will also wake any tasks that are waiting on an .acquire() call,
            so for long-lived applications it’s a good idea to call .close() during shutdown.
            */
            db_conn.close().await?;
        }

        if background_errors.is_zero() && !exited_with_err {
            info!("finished");
            Ok(())
        } else {
            // TODO: collect all the errors here instead?
            Err(anyhow::anyhow!("finished with errors!"))
        }
    }
}