improve flushing and buffering of stats

This commit is contained in:
Bryan Stitt 2023-07-19 22:32:52 -07:00
parent 90ffb5254a
commit ac2d9049be
7 changed files with 59 additions and 56 deletions

@ -320,7 +320,7 @@ impl Web3ProxyApp {
.build() .build()
.into(); .into();
// Generate the instance name (hostname + random hash) // Generate the instance name
let instance_hash = Ulid::new().to_string(); let instance_hash = Ulid::new().to_string();
// create a channel for receiving stats // create a channel for receiving stats

@ -26,6 +26,7 @@ use serde::Serialize;
use serde_json::value::RawValue; use serde_json::value::RawValue;
use siwe::VerificationError; use siwe::VerificationError;
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration;
use std::{borrow::Cow, net::IpAddr}; use std::{borrow::Cow, net::IpAddr};
use tokio::{sync::AcquireError, task::JoinError, time::Instant}; use tokio::{sync::AcquireError, task::JoinError, time::Instant};
use tracing::{debug, error, trace, warn}; use tracing::{debug, error, trace, warn};
@ -148,7 +149,7 @@ pub enum Web3ProxyError {
/// TODO: what should be attached to the timout? /// TODO: what should be attached to the timout?
#[display(fmt = "{:?}", _0)] #[display(fmt = "{:?}", _0)]
#[error(ignore)] #[error(ignore)]
Timeout(Option<tokio::time::error::Elapsed>), Timeout(Option<Duration>),
UlidDecode(ulid::DecodeError), UlidDecode(ulid::DecodeError),
#[error(ignore)] #[error(ignore)]
UnknownBlockHash(H256), UnknownBlockHash(H256),
@ -1128,8 +1129,8 @@ impl From<ethers::types::ParseBytesError> for Web3ProxyError {
} }
impl From<tokio::time::error::Elapsed> for Web3ProxyError { impl From<tokio::time::error::Elapsed> for Web3ProxyError {
fn from(err: tokio::time::error::Elapsed) -> Self { fn from(_: tokio::time::error::Elapsed) -> Self {
Self::Timeout(Some(err)) Self::Timeout(None)
} }
} }

@ -10,8 +10,8 @@ pub mod rpc_proxy_ws;
pub mod status; pub mod status;
pub mod users; pub mod users;
use crate::app::Web3ProxyApp;
use crate::errors::Web3ProxyResult; use crate::errors::Web3ProxyResult;
use crate::{app::Web3ProxyApp, errors::Web3ProxyError};
use axum::{ use axum::{
error_handling::HandleErrorLayer, error_handling::HandleErrorLayer,
routing::{get, post}, routing::{get, post},
@ -269,9 +269,8 @@ pub async fn serve(
ServiceBuilder::new() ServiceBuilder::new()
// this middleware goes above `TimeoutLayer` because it will receive // this middleware goes above `TimeoutLayer` because it will receive
// errors returned by `TimeoutLayer` // errors returned by `TimeoutLayer`
// TODO: JsonRPC error response
.layer(HandleErrorLayer::new(|_: BoxError| async { .layer(HandleErrorLayer::new(|_: BoxError| async {
StatusCode::REQUEST_TIMEOUT Web3ProxyError::Timeout(Some(Duration::from_secs(5 * 60)))
})) }))
.layer(TimeoutLayer::new(Duration::from_secs(5 * 60))), .layer(TimeoutLayer::new(Duration::from_secs(5 * 60))),
) )

@ -568,7 +568,6 @@ pub async fn query_user_influx_stats<'a>(
} }
}); });
// datapoints.insert(out.get("time"), out);
json!(out) json!(out)
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();

@ -479,37 +479,29 @@ impl BufferedRpcQueryStats {
measurement: &str, measurement: &str,
chain_id: u64, chain_id: u64,
key: RpcQueryKey, key: RpcQueryKey,
instance: &String, instance: &str,
) -> anyhow::Result<DataPoint> { ) -> anyhow::Result<DataPoint> {
let mut builder = DataPoint::builder(measurement); let mut builder = DataPoint::builder(measurement)
builder = builder.tag("chain_id", chain_id.to_string());
if let Some(rpc_secret_key_id) = key.rpc_secret_key_id {
builder = builder.tag("rpc_secret_key_id", rpc_secret_key_id.to_string());
}
builder = builder.tag("instance", instance);
builder = builder.tag("method", key.method);
builder = builder
.tag("archive_needed", key.archive_needed.to_string()) .tag("archive_needed", key.archive_needed.to_string())
.tag("chain_id", chain_id.to_string())
.tag("error_response", key.error_response.to_string()) .tag("error_response", key.error_response.to_string())
.tag("instance", instance)
.tag("method", key.method)
.tag("user_error_response", key.user_error_response.to_string()) .tag("user_error_response", key.user_error_response.to_string())
.field("frontend_requests", self.frontend_requests as i64) .timestamp(key.response_timestamp)
.field("backend_requests", self.backend_requests as i64) .field("backend_requests", self.backend_requests as i64)
.field("no_servers", self.no_servers as i64)
.field("cache_misses", self.cache_misses as i64)
.field("cache_hits", self.cache_hits as i64) .field("cache_hits", self.cache_hits as i64)
.field("cache_misses", self.cache_misses as i64)
.field("frontend_requests", self.frontend_requests as i64)
.field("no_servers", self.no_servers as i64)
.field("sum_request_bytes", self.sum_request_bytes as i64) .field("sum_request_bytes", self.sum_request_bytes as i64)
.field("sum_response_millis", self.sum_response_millis as i64)
.field("sum_response_bytes", self.sum_response_bytes as i64) .field("sum_response_bytes", self.sum_response_bytes as i64)
.field("sum_response_millis", self.sum_response_millis as i64)
.field( .field(
"sum_incl_free_credits_used", "balance",
self.sum_credits_used self.approximate_balance_remaining
.to_f64() .to_f64()
.context("sum_credits_used is really (too) large")?, .context("balance is really (too) large")?,
) )
.field( .field(
"sum_credits_used", "sum_credits_used",
@ -518,13 +510,16 @@ impl BufferedRpcQueryStats {
.context("sum_credits_used is really (too) large")?, .context("sum_credits_used is really (too) large")?,
) )
.field( .field(
"balance", "sum_incl_free_credits_used",
self.approximate_balance_remaining self.sum_credits_used
.to_f64() .to_f64()
.context("balance is really (too) large")?, .context("sum_credits_used is really (too) large")?,
); );
builder = builder.timestamp(key.response_timestamp); // TODO: set the rpc_secret_key_id tag to 0 when anon? will that make other queries easier?
if let Some(rpc_secret_key_id) = key.rpc_secret_key_id {
builder = builder.tag("rpc_secret_key_id", rpc_secret_key_id.to_string());
}
let point = builder.build()?; let point = builder.build()?;

@ -50,6 +50,7 @@ pub struct StatBuffer {
global_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>, global_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>,
influxdb_bucket: Option<String>, influxdb_bucket: Option<String>,
influxdb_client: Option<influxdb2::Client>, influxdb_client: Option<influxdb2::Client>,
instance_hash: String,
opt_in_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>, opt_in_timeseries_buffer: HashMap<RpcQueryKey, BufferedRpcQueryStats>,
rpc_secret_key_cache: RpcSecretKeyCache, rpc_secret_key_cache: RpcSecretKeyCache,
timestamp_precision: TimestampPrecision, timestamp_precision: TimestampPrecision,
@ -57,7 +58,6 @@ pub struct StatBuffer {
user_balance_cache: UserBalanceCache, user_balance_cache: UserBalanceCache,
_flush_sender: mpsc::Sender<oneshot::Sender<FlushedStats>>, _flush_sender: mpsc::Sender<oneshot::Sender<FlushedStats>>,
instance_hash: String,
} }
impl StatBuffer { impl StatBuffer {
@ -92,13 +92,14 @@ impl StatBuffer {
global_timeseries_buffer: Default::default(), global_timeseries_buffer: Default::default(),
influxdb_bucket, influxdb_bucket,
influxdb_client, influxdb_client,
instance_hash,
opt_in_timeseries_buffer: Default::default(), opt_in_timeseries_buffer: Default::default(),
rpc_secret_key_cache: rpc_secret_key_cache.unwrap(), rpc_secret_key_cache: rpc_secret_key_cache.unwrap(),
timestamp_precision, timestamp_precision,
tsdb_save_interval_seconds, tsdb_save_interval_seconds,
user_balance_cache: user_balance_cache.unwrap(), user_balance_cache: user_balance_cache.unwrap(),
_flush_sender: flush_sender, _flush_sender: flush_sender,
instance_hash,
}; };
// any errors inside this task will cause the application to exit // any errors inside this task will cause the application to exit
@ -149,20 +150,8 @@ impl StatBuffer {
x = flush_receiver.recv() => { x = flush_receiver.recv() => {
match x { match x {
Some(x) => { Some(x) => {
trace!("flush"); let flushed_stats = self._flush(&mut stat_receiver).await?;
// fill the buffer
while let Ok(stat) = stat_receiver.try_recv() {
self._buffer_app_stat(stat).await?;
}
// flush the buffers
let tsdb_count = self.save_tsdb_stats().await;
let relational_count = self.save_relational_stats().await;
// notify
let flushed_stats = FlushedStats{ timeseries: tsdb_count, relational: relational_count};
trace!(?flushed_stats);
if let Err(err) = x.send(flushed_stats) { if let Err(err) = x.send(flushed_stats) {
error!(?flushed_stats, ?err, "unable to notify about flushed stats"); error!(?flushed_stats, ?err, "unable to notify about flushed stats");
} }
@ -201,13 +190,7 @@ impl StatBuffer {
// sleep(Duration::from_millis(10)).await; // sleep(Duration::from_millis(10)).await;
// } // }
let saved_relational = self.save_relational_stats().await; self._flush(&mut stat_receiver).await?;
info!("saved {} pending relational stat(s)", saved_relational);
let saved_tsdb = self.save_tsdb_stats().await;
info!("saved {} pending tsdb stat(s)", saved_tsdb);
info!("accounting and stat save loop complete"); info!("accounting and stat save loop complete");
@ -320,6 +303,32 @@ impl StatBuffer {
Ok(()) Ok(())
} }
async fn _flush(
&mut self,
stat_receiver: &mut mpsc::UnboundedReceiver<AppStat>,
) -> Web3ProxyResult<FlushedStats> {
trace!("flush");
// fill the buffer
while let Ok(stat) = stat_receiver.try_recv() {
self._buffer_app_stat(stat).await?;
}
// flush the buffers
let tsdb_count = self.save_tsdb_stats().await;
let relational_count = self.save_relational_stats().await;
// notify
let flushed_stats = FlushedStats {
timeseries: tsdb_count,
relational: relational_count,
};
trace!(?flushed_stats);
Ok(flushed_stats)
}
async fn save_relational_stats(&mut self) -> usize { async fn save_relational_stats(&mut self) -> usize {
let mut count = 0; let mut count = 0;

@ -131,7 +131,7 @@ async fn test_multiple_proxies_stats_add_up() {
assert_eq!(flush_1_count_0.relational, 1); assert_eq!(flush_1_count_0.relational, 1);
assert_eq!(flush_1_count_0.timeseries, 2); assert_eq!(flush_1_count_0.timeseries, 2);
// // no more stats should arrive // no more stats should arrive
let flush_0_count_1 = x_0.flush_stats().await.unwrap(); let flush_0_count_1 = x_0.flush_stats().await.unwrap();
let flush_1_count_1 = x_1.flush_stats().await.unwrap(); let flush_1_count_1 = x_1.flush_stats().await.unwrap();
info!("Counts 0 are: {:?}", flush_0_count_1); info!("Counts 0 are: {:?}", flush_0_count_1);