error on max_head_block_age and keep the sender alive longer

This commit is contained in:
Bryan Stitt 2023-07-10 22:17:15 -07:00
parent 1af023d689
commit 3059d08674
8 changed files with 32 additions and 7 deletions

View File

@ -179,6 +179,7 @@ impl Web3ProxyApp {
top_config: TopConfig, top_config: TopConfig,
num_workers: usize, num_workers: usize,
shutdown_sender: broadcast::Sender<()>, shutdown_sender: broadcast::Sender<()>,
flush_stat_buffer_sender: flume::Sender<oneshot::Sender<(usize, usize)>>,
flush_stat_buffer_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>, flush_stat_buffer_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>,
) -> anyhow::Result<Web3ProxyAppSpawn> { ) -> anyhow::Result<Web3ProxyAppSpawn> {
let stat_buffer_shutdown_receiver = shutdown_sender.subscribe(); let stat_buffer_shutdown_receiver = shutdown_sender.subscribe();
@ -390,6 +391,7 @@ impl Web3ProxyApp {
Some(user_balance_cache.clone()), Some(user_balance_cache.clone()),
stat_buffer_shutdown_receiver, stat_buffer_shutdown_receiver,
1, 1,
flush_stat_buffer_sender.clone(),
flush_stat_buffer_receiver, flush_stat_buffer_receiver,
)? { )? {
// since the database entries are used for accounting, we want to be sure everything is saved before exiting // since the database entries are used for accounting, we want to be sure everything is saved before exiting

View File

@ -279,6 +279,7 @@ impl Web3RpcConfig {
http_client: Option<reqwest::Client>, http_client: Option<reqwest::Client>,
blocks_by_hash_cache: BlocksByHashCache, blocks_by_hash_cache: BlocksByHashCache,
block_sender: Option<flume::Sender<BlockAndRpc>>, block_sender: Option<flume::Sender<BlockAndRpc>>,
max_head_block_age: Duration,
tx_id_sender: Option<flume::Sender<TxHashAndRpc>>, tx_id_sender: Option<flume::Sender<TxHashAndRpc>>,
) -> anyhow::Result<(Arc<Web3Rpc>, Web3ProxyJoinHandle<()>)> { ) -> anyhow::Result<(Arc<Web3Rpc>, Web3ProxyJoinHandle<()>)> {
if !self.extra.is_empty() { if !self.extra.is_empty() {
@ -295,6 +296,7 @@ impl Web3RpcConfig {
block_interval, block_interval,
blocks_by_hash_cache, blocks_by_hash_cache,
block_sender, block_sender,
max_head_block_age,
tx_id_sender, tx_id_sender,
) )
.await .await

View File

@ -221,6 +221,7 @@ impl Web3Rpcs {
http_client, http_client,
blocks_by_hash_cache, blocks_by_hash_cache,
block_sender, block_sender,
self.max_head_block_age,
pending_tx_id_sender, pending_tx_id_sender,
)); ));

View File

@ -71,6 +71,8 @@ pub struct Web3Rpc {
pub(super) internal_requests: AtomicUsize, pub(super) internal_requests: AtomicUsize,
/// Track total external requests served /// Track total external requests served
pub(super) external_requests: AtomicUsize, pub(super) external_requests: AtomicUsize,
/// If the head block is too old, it is ignored.
pub(super) max_head_block_age: Duration,
/// Track time used by external requests served /// Track time used by external requests served
/// request_ms_histogram is only inside an Option so that the "Default" derive works. it will always be set. /// request_ms_histogram is only inside an Option so that the "Default" derive works. it will always be set.
pub(super) median_latency: Option<RollingQuantileLatency>, pub(super) median_latency: Option<RollingQuantileLatency>,
@ -97,6 +99,7 @@ impl Web3Rpc {
block_interval: Duration, block_interval: Duration,
block_map: BlocksByHashCache, block_map: BlocksByHashCache,
block_and_rpc_sender: Option<flume::Sender<BlockAndRpc>>, block_and_rpc_sender: Option<flume::Sender<BlockAndRpc>>,
max_head_block_age: Duration,
tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>, tx_id_sender: Option<flume::Sender<(TxHash, Arc<Self>)>>,
) -> anyhow::Result<(Arc<Web3Rpc>, Web3ProxyJoinHandle<()>)> { ) -> anyhow::Result<(Arc<Web3Rpc>, Web3ProxyJoinHandle<()>)> {
let created_at = Instant::now(); let created_at = Instant::now();
@ -193,6 +196,7 @@ impl Web3Rpc {
hard_limit_until: Some(hard_limit_until), hard_limit_until: Some(hard_limit_until),
head_block: Some(head_block), head_block: Some(head_block),
http_provider, http_provider,
max_head_block_age,
name, name,
peak_latency: Some(peak_latency), peak_latency: Some(peak_latency),
median_latency: Some(median_request_latency), median_latency: Some(median_request_latency),
@ -544,9 +548,12 @@ impl Web3Rpc {
let head_block = self.head_block.as_ref().unwrap().borrow().clone(); let head_block = self.head_block.as_ref().unwrap().borrow().clone();
if let Some(head_block) = head_block { if let Some(head_block) = head_block {
let head_block = head_block.block;
// TODO: if head block is very old and not expected to be syncing, emit warning // TODO: if head block is very old and not expected to be syncing, emit warning
if head_block.age() > self.max_head_block_age {
return Err(anyhow::anyhow!("head_block is too old!").into());
}
let head_block = head_block.block;
let block_number = head_block.number.context("no block number")?; let block_number = head_block.number.context("no block number")?;

View File

@ -49,9 +49,11 @@ pub struct StatBuffer {
influxdb_client: Option<influxdb2::Client>, influxdb_client: Option<influxdb2::Client>,
opt_in_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>, opt_in_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>,
rpc_secret_key_cache: RpcSecretKeyCache, rpc_secret_key_cache: RpcSecretKeyCache,
user_balance_cache: UserBalanceCache,
timestamp_precision: TimestampPrecision, timestamp_precision: TimestampPrecision,
tsdb_save_interval_seconds: u32, tsdb_save_interval_seconds: u32,
user_balance_cache: UserBalanceCache,
_flush_sender: flume::Sender<oneshot::Sender<(usize, usize)>>,
} }
impl StatBuffer { impl StatBuffer {
@ -67,6 +69,7 @@ impl StatBuffer {
user_balance_cache: Option<UserBalanceCache>, user_balance_cache: Option<UserBalanceCache>,
shutdown_receiver: broadcast::Receiver<()>, shutdown_receiver: broadcast::Receiver<()>,
tsdb_save_interval_seconds: u32, tsdb_save_interval_seconds: u32,
flush_sender: flume::Sender<oneshot::Sender<(usize, usize)>>,
flush_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>, flush_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>,
) -> anyhow::Result<Option<SpawnedStatBuffer>> { ) -> anyhow::Result<Option<SpawnedStatBuffer>> {
if influxdb_bucket.is_none() { if influxdb_bucket.is_none() {
@ -92,9 +95,10 @@ impl StatBuffer {
influxdb_client, influxdb_client,
opt_in_timeseries_buffer: Default::default(), opt_in_timeseries_buffer: Default::default(),
rpc_secret_key_cache: rpc_secret_key_cache.unwrap(), rpc_secret_key_cache: rpc_secret_key_cache.unwrap(),
user_balance_cache: user_balance_cache.unwrap(),
timestamp_precision, timestamp_precision,
tsdb_save_interval_seconds, tsdb_save_interval_seconds,
user_balance_cache: user_balance_cache.unwrap(),
_flush_sender: flush_sender,
}; };
// any errors inside this task will cause the application to exit // any errors inside this task will cause the application to exit

View File

@ -72,7 +72,7 @@ impl MigrateStatsToV2SubCommand {
None => None, None => None,
}; };
let (_flush_sender, flush_receiver) = flume::bounded(1); let (flush_sender, flush_receiver) = flume::bounded(1);
// Spawn the stat-sender // Spawn the stat-sender
let emitter_spawn = StatBuffer::try_spawn( let emitter_spawn = StatBuffer::try_spawn(
@ -86,6 +86,7 @@ impl MigrateStatsToV2SubCommand {
None, None,
rpc_account_shutdown_recevier, rpc_account_shutdown_recevier,
1, 1,
flush_sender,
flush_receiver, flush_receiver,
) )
.context("Error spawning stat buffer")? .context("Error spawning stat buffer")?

View File

@ -14,7 +14,7 @@ use std::{fs, thread};
use tokio::select; use tokio::select;
use tokio::sync::{broadcast, oneshot}; use tokio::sync::{broadcast, oneshot};
use tokio::time::{sleep_until, Instant}; use tokio::time::{sleep_until, Instant};
use tracing::{error, info, trace, warn}; use tracing::{debug, error, info, trace, warn};
/// start the main proxy daemon /// start the main proxy daemon
#[derive(FromArgs, PartialEq, Debug, Eq)] #[derive(FromArgs, PartialEq, Debug, Eq)]
@ -42,7 +42,7 @@ impl ProxydSubCommand {
let frontend_port = Arc::new(self.port.into()); let frontend_port = Arc::new(self.port.into());
let prometheus_port = Arc::new(self.prometheus_port.into()); let prometheus_port = Arc::new(self.prometheus_port.into());
let (_flush_stat_buffer_sender, flush_stat_buffer_receiver) = flume::bounded(1); let (flush_stat_buffer_sender, flush_stat_buffer_receiver) = flume::bounded(8);
Self::_main( Self::_main(
top_config, top_config,
@ -51,12 +51,14 @@ impl ProxydSubCommand {
prometheus_port, prometheus_port,
num_workers, num_workers,
frontend_shutdown_sender, frontend_shutdown_sender,
flush_stat_buffer_sender,
flush_stat_buffer_receiver, flush_stat_buffer_receiver,
) )
.await .await
} }
/// this shouldn't really be pub except it makes test fixtures easier /// this shouldn't really be pub except it makes test fixtures easier
#[allow(clippy::too_many_arguments)]
pub async fn _main( pub async fn _main(
top_config: TopConfig, top_config: TopConfig,
top_config_path: Option<PathBuf>, top_config_path: Option<PathBuf>,
@ -64,6 +66,7 @@ impl ProxydSubCommand {
prometheus_port: Arc<AtomicU16>, prometheus_port: Arc<AtomicU16>,
num_workers: usize, num_workers: usize,
frontend_shutdown_sender: broadcast::Sender<()>, frontend_shutdown_sender: broadcast::Sender<()>,
flush_stat_buffer_sender: flume::Sender<oneshot::Sender<(usize, usize)>>,
flush_stat_buffer_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>, flush_stat_buffer_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// tokio has code for catching ctrl+c so we use that to shut down in most cases // tokio has code for catching ctrl+c so we use that to shut down in most cases
@ -85,6 +88,7 @@ impl ProxydSubCommand {
top_config.clone(), top_config.clone(),
num_workers, num_workers,
app_shutdown_sender.clone(), app_shutdown_sender.clone(),
flush_stat_buffer_sender,
flush_stat_buffer_receiver, flush_stat_buffer_receiver,
) )
.await?; .await?;
@ -102,6 +106,9 @@ impl ProxydSubCommand {
Ok(new_top_config) => match toml::from_str::<TopConfig>(&new_top_config) { Ok(new_top_config) => match toml::from_str::<TopConfig>(&new_top_config) {
Ok(new_top_config) => { Ok(new_top_config) => {
if new_top_config != current_config { if new_top_config != current_config {
trace!("current_config: {:#?}", current_config);
trace!("new_top_config: {:#?}", new_top_config);
// TODO: print the differences // TODO: print the differences
// TODO: first run seems to always see differences. why? // TODO: first run seems to always see differences. why?
info!("config @ {:?} changed", top_config_path); info!("config @ {:?} changed", top_config_path);

View File

@ -290,6 +290,7 @@ impl TestApp {
prometheus_port_arc, prometheus_port_arc,
num_workers, num_workers,
shutdown_sender.clone(), shutdown_sender.clone(),
flush_stat_buffer_sender.clone(),
flush_stat_buffer_receiver, flush_stat_buffer_receiver,
)) ))
}; };