Flush stats (#161)

* create buffer if mysql OR influx is set up

* this one flush should error

* it helps to set up the db in the db migration test

* comment
This commit is contained in:
Bryan Stitt 2023-07-05 19:24:21 -07:00 committed by GitHub
parent eb7b98fdbe
commit 3bbbdd5596
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 96 additions and 69 deletions

View File

@ -51,7 +51,7 @@ use std::str::FromStr;
use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::atomic::{AtomicU16, Ordering};
use std::sync::{atomic, Arc}; use std::sync::{atomic, Arc};
use std::time::Duration; use std::time::Duration;
use tokio::sync::{broadcast, watch, Semaphore}; use tokio::sync::{broadcast, watch, Semaphore, oneshot};
use tokio::task::JoinHandle; use tokio::task::JoinHandle;
use tokio::time::{sleep, timeout}; use tokio::time::{sleep, timeout};
use tracing::{error, info, trace, warn, Level}; use tracing::{error, info, trace, warn, Level};
@ -185,7 +185,7 @@ impl Web3ProxyApp {
top_config: TopConfig, top_config: TopConfig,
num_workers: usize, num_workers: usize,
shutdown_sender: broadcast::Sender<()>, shutdown_sender: broadcast::Sender<()>,
flush_stat_buffer_receiver: broadcast::Receiver<()> flush_stat_buffer_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>,
) -> anyhow::Result<Web3ProxyAppSpawn> { ) -> anyhow::Result<Web3ProxyAppSpawn> {
let stat_buffer_shutdown_receiver = shutdown_sender.subscribe(); let stat_buffer_shutdown_receiver = shutdown_sender.subscribe();
let mut background_shutdown_receiver = shutdown_sender.subscribe(); let mut background_shutdown_receiver = shutdown_sender.subscribe();
@ -385,31 +385,27 @@ impl Web3ProxyApp {
// create a channel for receiving stats // create a channel for receiving stats
// we do this in a channel so we don't slow down our response to the users // we do this in a channel so we don't slow down our response to the users
// stats can be saved in mysql, influxdb, both, or none // stats can be saved in mysql, influxdb, both, or none
let mut stat_sender = None; let stat_sender = if let Some(spawned_stat_buffer) = StatBuffer::try_spawn(
if let Some(influxdb_bucket) = top_config.app.influxdb_bucket.clone() { BILLING_PERIOD_SECONDS,
if let Some(spawned_stat_buffer) = StatBuffer::try_spawn( top_config.app.chain_id,
BILLING_PERIOD_SECONDS, db_conn.clone(),
influxdb_bucket, 60,
top_config.app.chain_id, top_config.app.influxdb_bucket.clone(),
db_conn.clone(), influxdb_client.clone(),
60, Some(rpc_secret_key_cache.clone()),
influxdb_client.clone(), Some(user_balance_cache.clone()),
Some(rpc_secret_key_cache.clone()), stat_buffer_shutdown_receiver,
Some(user_balance_cache.clone()), 1,
stat_buffer_shutdown_receiver, flush_stat_buffer_receiver,
1, )? {
flush_stat_buffer_receiver, // since the database entries are used for accounting, we want to be sure everything is saved before exiting
)? { important_background_handles.push(spawned_stat_buffer.background_handle);
// since the database entries are used for accounting, we want to be sure everything is saved before exiting
important_background_handles.push(spawned_stat_buffer.background_handle);
stat_sender = Some(spawned_stat_buffer.stat_sender); Some(spawned_stat_buffer.stat_sender)
} } else {
}
if stat_sender.is_none() {
info!("stats will not be collected"); info!("stats will not be collected");
} None
};
// make a http shared client // make a http shared client
// TODO: can we configure the connection pool? should we? // TODO: can we configure the connection pool? should we?

View File

@ -9,9 +9,9 @@ use influxdb2::api::write::TimestampPrecision;
use migration::sea_orm::prelude::Decimal; use migration::sea_orm::prelude::Decimal;
use migration::sea_orm::DatabaseConnection; use migration::sea_orm::DatabaseConnection;
use std::time::Duration; use std::time::Duration;
use tokio::sync::broadcast; use tokio::sync::{broadcast, oneshot};
use tokio::time::{interval, sleep}; use tokio::time::{interval, sleep};
use tracing::{error, info, trace}; use tracing::{error, info, trace, warn};
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct BufferedRpcQueryStats { pub struct BufferedRpcQueryStats {
@ -43,6 +43,7 @@ pub struct StatBuffer {
db_conn: Option<DatabaseConnection>, db_conn: Option<DatabaseConnection>,
db_save_interval_seconds: u32, db_save_interval_seconds: u32,
global_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>, global_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>,
influxdb_bucket: Option<String>,
influxdb_client: Option<influxdb2::Client>, influxdb_client: Option<influxdb2::Client>,
opt_in_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>, opt_in_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>,
rpc_secret_key_cache: RpcSecretKeyCache, rpc_secret_key_cache: RpcSecretKeyCache,
@ -55,17 +56,21 @@ impl StatBuffer {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn try_spawn( pub fn try_spawn(
billing_period_seconds: i64, billing_period_seconds: i64,
bucket: String,
chain_id: u64, chain_id: u64,
db_conn: Option<DatabaseConnection>, db_conn: Option<DatabaseConnection>,
db_save_interval_seconds: u32, db_save_interval_seconds: u32,
influxdb_client: Option<influxdb2::Client>, influxdb_bucket: Option<String>,
mut influxdb_client: Option<influxdb2::Client>,
rpc_secret_key_cache: Option<RpcSecretKeyCache>, rpc_secret_key_cache: Option<RpcSecretKeyCache>,
user_balance_cache: Option<UserBalanceCache>, user_balance_cache: Option<UserBalanceCache>,
shutdown_receiver: broadcast::Receiver<()>, shutdown_receiver: broadcast::Receiver<()>,
tsdb_save_interval_seconds: u32, tsdb_save_interval_seconds: u32,
flush_receiver: broadcast::Receiver<()>, flush_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>,
) -> anyhow::Result<Option<SpawnedStatBuffer>> { ) -> anyhow::Result<Option<SpawnedStatBuffer>> {
if influxdb_bucket.is_none() {
influxdb_client = None;
}
if db_conn.is_none() && influxdb_client.is_none() { if db_conn.is_none() && influxdb_client.is_none() {
return Ok(None); return Ok(None);
} }
@ -73,6 +78,7 @@ impl StatBuffer {
let (stat_sender, stat_receiver) = flume::unbounded(); let (stat_sender, stat_receiver) = flume::unbounded();
let timestamp_precision = TimestampPrecision::Seconds; let timestamp_precision = TimestampPrecision::Seconds;
let mut new = Self { let mut new = Self {
accounting_db_buffer: Default::default(), accounting_db_buffer: Default::default(),
billing_period_seconds, billing_period_seconds,
@ -80,6 +86,7 @@ impl StatBuffer {
db_conn, db_conn,
db_save_interval_seconds, db_save_interval_seconds,
global_timeseries_buffer: Default::default(), global_timeseries_buffer: Default::default(),
influxdb_bucket,
influxdb_client, influxdb_client,
opt_in_timeseries_buffer: Default::default(), opt_in_timeseries_buffer: Default::default(),
rpc_secret_key_cache: rpc_secret_key_cache.unwrap(), rpc_secret_key_cache: rpc_secret_key_cache.unwrap(),
@ -89,8 +96,9 @@ impl StatBuffer {
}; };
// any errors inside this task will cause the application to exit // any errors inside this task will cause the application to exit
// TODO? change this to the X and XTask pattern like the latency crate uses
let handle = tokio::spawn(async move { let handle = tokio::spawn(async move {
new.aggregate_and_save_loop(bucket, stat_receiver, shutdown_receiver, flush_receiver) new.aggregate_and_save_loop(stat_receiver, shutdown_receiver, flush_receiver)
.await .await
}); });
@ -99,10 +107,9 @@ impl StatBuffer {
async fn aggregate_and_save_loop( async fn aggregate_and_save_loop(
&mut self, &mut self,
bucket: String,
stat_receiver: flume::Receiver<AppStat>, stat_receiver: flume::Receiver<AppStat>,
mut shutdown_receiver: broadcast::Receiver<()>, mut shutdown_receiver: broadcast::Receiver<()>,
mut flush_receiver: broadcast::Receiver<()>, flush_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>,
) -> Web3ProxyResult<()> { ) -> Web3ProxyResult<()> {
let mut tsdb_save_interval = let mut tsdb_save_interval =
interval(Duration::from_secs(self.tsdb_save_interval_seconds as u64)); interval(Duration::from_secs(self.tsdb_save_interval_seconds as u64));
@ -147,22 +154,30 @@ impl StatBuffer {
} }
_ = tsdb_save_interval.tick() => { _ = tsdb_save_interval.tick() => {
trace!("TSDB save internal tick"); trace!("TSDB save internal tick");
let count = self.save_tsdb_stats(&bucket).await; let count = self.save_tsdb_stats().await;
if count > 0 { if count > 0 {
trace!("Saved {} stats to the tsdb", count); trace!("Saved {} stats to the tsdb", count);
} }
} }
_ = flush_receiver.recv() => { x = flush_receiver.recv_async() => {
trace!("flush"); if let Ok(x) = x {
trace!("flush");
let count = self.save_tsdb_stats(&bucket).await; let tsdb_count = self.save_tsdb_stats().await;
if count > 0 { if tsdb_count > 0 {
trace!("Flushed {} stats to the tsdb", count); trace!("Flushed {} stats to the tsdb", tsdb_count);
} }
let count = self.save_relational_stats().await; let relational_count = self.save_relational_stats().await;
if count > 0 { if relational_count > 0 {
trace!("Flushed {} stats to the relational db", count); trace!("Flushed {} stats to the relational db", relational_count);
}
if let Err(err) = x.send((tsdb_count, relational_count)) {
warn!(%tsdb_count, %relational_count, ?err, "unable to notify about flushed stats");
}
} else {
unimplemented!()
} }
} }
x = shutdown_receiver.recv() => { x = shutdown_receiver.recv() => {
@ -197,7 +212,7 @@ impl StatBuffer {
info!("saved {} pending relational stat(s)", saved_relational); info!("saved {} pending relational stat(s)", saved_relational);
let saved_tsdb = self.save_tsdb_stats(&bucket).await; let saved_tsdb = self.save_tsdb_stats().await;
info!("saved {} pending tsdb stat(s)", saved_tsdb); info!("saved {} pending tsdb stat(s)", saved_tsdb);
@ -233,10 +248,15 @@ impl StatBuffer {
} }
// TODO: bucket should be an enum so that we don't risk typos // TODO: bucket should be an enum so that we don't risk typos
async fn save_tsdb_stats(&mut self, bucket: &str) -> usize { async fn save_tsdb_stats(&mut self) -> usize {
let mut count = 0; let mut count = 0;
if let Some(influxdb_client) = self.influxdb_client.as_ref() { if let Some(influxdb_client) = self.influxdb_client.as_ref() {
let influxdb_bucket = self
.influxdb_bucket
.as_ref()
.expect("if client is set, bucket must be set");
// TODO: use stream::iter properly to avoid allocating this Vec // TODO: use stream::iter properly to avoid allocating this Vec
let mut points = vec![]; let mut points = vec![];
@ -290,7 +310,7 @@ impl StatBuffer {
if let Err(err) = influxdb_client if let Err(err) = influxdb_client
.write_with_precision( .write_with_precision(
bucket, influxdb_bucket,
stream::iter(points), stream::iter(points),
self.timestamp_precision, self.timestamp_precision,
) )

View File

@ -72,19 +72,15 @@ impl MigrateStatsToV2SubCommand {
None => None, None => None,
}; };
let (_flush_sender, flush_receiver) = broadcast::channel(1); let (_flush_sender, flush_receiver) = flume::bounded(1);
// Spawn the stat-sender // Spawn the stat-sender
let emitter_spawn = StatBuffer::try_spawn( let emitter_spawn = StatBuffer::try_spawn(
BILLING_PERIOD_SECONDS, BILLING_PERIOD_SECONDS,
top_config
.app
.influxdb_bucket
.clone()
.context("No influxdb bucket was provided")?,
top_config.app.chain_id, top_config.app.chain_id,
Some(db_conn.clone()), Some(db_conn.clone()),
30, 30,
top_config.app.influxdb_bucket.clone(),
influxdb_client.clone(), influxdb_client.clone(),
None, None,
None, None,

View File

@ -12,7 +12,7 @@ use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use tokio::select; use tokio::select;
use tokio::sync::broadcast; use tokio::sync::{broadcast, oneshot};
use tokio::time::{sleep_until, Instant}; use tokio::time::{sleep_until, Instant};
use tracing::{error, info, trace, warn}; use tracing::{error, info, trace, warn};
@ -37,12 +37,12 @@ impl ProxydSubCommand {
top_config_path: PathBuf, top_config_path: PathBuf,
num_workers: usize, num_workers: usize,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let (shutdown_sender, _) = broadcast::channel(1); let (frontend_shutdown_sender, _) = broadcast::channel(1);
// TODO: i think there is a small race. if config_path changes // TODO: i think there is a small race. if config_path changes
let frontend_port = Arc::new(self.port.into()); let frontend_port = Arc::new(self.port.into());
let prometheus_port = Arc::new(self.prometheus_port.into()); let prometheus_port = Arc::new(self.prometheus_port.into());
let (flush_stat_buffer_sender, _) = broadcast::channel(1); let (_flush_stat_buffer_sender, flush_stat_buffer_receiver) = flume::bounded(1);
Self::_main( Self::_main(
top_config, top_config,
@ -50,8 +50,8 @@ impl ProxydSubCommand {
frontend_port, frontend_port,
prometheus_port, prometheus_port,
num_workers, num_workers,
shutdown_sender, frontend_shutdown_sender,
flush_stat_buffer_sender, flush_stat_buffer_receiver,
) )
.await .await
} }
@ -64,7 +64,7 @@ impl ProxydSubCommand {
prometheus_port: Arc<AtomicU16>, prometheus_port: Arc<AtomicU16>,
num_workers: usize, num_workers: usize,
frontend_shutdown_sender: broadcast::Sender<()>, frontend_shutdown_sender: broadcast::Sender<()>,
flush_stat_buffer_sender: broadcast::Sender<()>, flush_stat_buffer_receiver: flume::Receiver<oneshot::Sender<(usize, usize)>>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
// tokio has code for catching ctrl+c so we use that to shut down in most cases // tokio has code for catching ctrl+c so we use that to shut down in most cases
// frontend_shutdown_sender is currently only used in tests, but we might make a /shutdown endpoint or something // frontend_shutdown_sender is currently only used in tests, but we might make a /shutdown endpoint or something
@ -85,7 +85,7 @@ impl ProxydSubCommand {
top_config.clone(), top_config.clone(),
num_workers, num_workers,
app_shutdown_sender.clone(), app_shutdown_sender.clone(),
flush_stat_buffer_sender.subscribe(), flush_stat_buffer_receiver,
) )
.await?; .await?;

View File

@ -20,7 +20,10 @@ use std::{sync::Arc, time::Duration};
use tokio::{ use tokio::{
net::TcpStream, net::TcpStream,
process::Command as AsyncCommand, process::Command as AsyncCommand,
sync::broadcast::{self, error::SendError}, sync::{
broadcast::{self, error::SendError},
oneshot,
},
task::JoinHandle, task::JoinHandle,
time::{sleep, Instant}, time::{sleep, Instant},
}; };
@ -55,7 +58,7 @@ pub struct TestApp {
pub proxy_provider: Provider<Http>, pub proxy_provider: Provider<Http>,
/// tell the app to flush stats to the database /// tell the app to flush stats to the database
flush_stat_buffer_sender: broadcast::Sender<()>, flush_stat_buffer_sender: flume::Sender<oneshot::Sender<(usize, usize)>>,
/// tell the app to shut down (use `self.stop()`). /// tell the app to shut down (use `self.stop()`).
shutdown_sender: broadcast::Sender<()>, shutdown_sender: broadcast::Sender<()>,
@ -272,7 +275,7 @@ impl TestApp {
let frontend_port_arc = Arc::new(AtomicU16::new(0)); let frontend_port_arc = Arc::new(AtomicU16::new(0));
let prometheus_port_arc = Arc::new(AtomicU16::new(0)); let prometheus_port_arc = Arc::new(AtomicU16::new(0));
let (flush_stat_buffer_sender, _flush_stat_buffer_receiver) = broadcast::channel(1); let (flush_stat_buffer_sender, flush_stat_buffer_receiver) = flume::bounded(1);
// spawn the app // spawn the app
// TODO: spawn in a thread so we can run from non-async tests and so the Drop impl can wait for it to stop // TODO: spawn in a thread so we can run from non-async tests and so the Drop impl can wait for it to stop
@ -284,7 +287,7 @@ impl TestApp {
prometheus_port_arc, prometheus_port_arc,
num_workers, num_workers,
shutdown_sender.clone(), shutdown_sender.clone(),
flush_stat_buffer_sender.clone(), flush_stat_buffer_receiver,
)) ))
}; };
@ -320,8 +323,15 @@ impl TestApp {
self.db.as_ref().unwrap().conn.as_ref().unwrap() self.db.as_ref().unwrap().conn.as_ref().unwrap()
} }
pub fn flush_stats(&self) { #[allow(unused)]
self.flush_stat_buffer_sender.send(()).unwrap(); pub async fn flush_stats(&self) -> anyhow::Result<(usize, usize)> {
let (tx, rx) = oneshot::channel();
self.flush_stat_buffer_sender.send(tx)?;
let x = rx.await?;
Ok(x)
} }
pub fn stop(&self) -> Result<usize, SendError<()>> { pub fn stop(&self) -> Result<usize, SendError<()>> {

View File

@ -1,3 +1,3 @@
mod app; pub mod app;
pub use self::app::TestApp; pub use self::app::TestApp;

View File

@ -10,11 +10,13 @@ use tokio::{
}; };
use web3_proxy::rpcs::blockchain::ArcBlock; use web3_proxy::rpcs::blockchain::ArcBlock;
// #[cfg_attr(not(feature = "tests-needing-docker"), ignore)] #[cfg_attr(not(feature = "tests-needing-docker"), ignore)]
#[ignore = "under construction"]
#[test_log::test(tokio::test)] #[test_log::test(tokio::test)]
async fn it_migrates_the_db() { async fn it_migrates_the_db() {
TestApp::spawn(true).await; let x = TestApp::spawn(true).await;
// we call flush stats more to be sure it works than because we expect it to save any stats
x.flush_stats().await.unwrap();
} }
#[test_log::test(tokio::test)] #[test_log::test(tokio::test)]
@ -89,6 +91,9 @@ async fn it_starts_and_stops() {
assert_eq!(anvil_result, proxy_result.unwrap()); assert_eq!(anvil_result, proxy_result.unwrap());
// this won't do anything since stats aren't tracked when there isn't a db
x.flush_stats().await.unwrap_err();
// most tests won't need to wait, but we should wait here to be sure all the shutdown logic works properly // most tests won't need to wait, but we should wait here to be sure all the shutdown logic works properly
x.wait().await; x.wait().await;
} }