move floats and durations around for latency tracking
This commit is contained in:
parent
2f0e6103ac
commit
825ba006f1
2
TODO.md
2
TODO.md
@ -430,7 +430,7 @@ These are not yet ordered. There might be duplicates. We might not actually need
|
||||
- [ ] cli for adding rpc keys to an existing user
|
||||
- [ ] rename "private" to "mev protected" to avoid confusion about private transactions being public once they are mined
|
||||
- [ ] allow restricting an rpc key to specific chains
|
||||
- [-] writes to request_latency should be handled by a background task so they don't slow down the request
|
||||
- [-] writes to median_request_latency should be handled by a background task so they don't slow down the request
|
||||
- [ ] keep re-broadcasting transactions until they are confirmed
|
||||
- [ ] if mev protection is disabled, we should send to *both* balanced_rpcs *and* private_rps
|
||||
- [x] if mev protection is enabled, we should sent to *only* private_rpcs
|
||||
|
@ -40,7 +40,7 @@ impl EwmaLatency {
|
||||
|
||||
/// Current EWMA value in seconds
|
||||
#[inline]
|
||||
pub fn duration(&self) -> Duration {
|
||||
pub fn latency(&self) -> Duration {
|
||||
let x = self.seconds.get();
|
||||
|
||||
Duration::from_secs_f32(x)
|
||||
|
@ -69,7 +69,7 @@ impl RollingQuantileLatency {
|
||||
|
||||
/// Current median.
|
||||
#[inline]
|
||||
pub fn duration(&self) -> Duration {
|
||||
pub fn latency(&self) -> Duration {
|
||||
Duration::from_secs_f32(self.seconds())
|
||||
}
|
||||
}
|
||||
|
@ -495,9 +495,11 @@ impl Web3ProxyApp {
|
||||
// create semaphores for concurrent connection limits
|
||||
// TODO: how can we implement time til idle?
|
||||
// TODO: what should tti be for semaphores?
|
||||
let bearer_token_semaphores = Cache::new(max_users);
|
||||
let ip_semaphores = Cache::new(max_users);
|
||||
let user_semaphores = Cache::new(max_users);
|
||||
let bearer_token_semaphores = CacheBuilder::new(max_users)
|
||||
.name("bearer_token_semaphores")
|
||||
.build();
|
||||
let ip_semaphores = CacheBuilder::new(max_users).name("ip_semaphores").build();
|
||||
let user_semaphores = CacheBuilder::new(max_users).name("user_semaphores").build();
|
||||
|
||||
let chain_id = top_config.app.chain_id;
|
||||
|
||||
|
@ -24,7 +24,7 @@ struct BackendRpcData<'a> {
|
||||
active_requests: u64,
|
||||
internal_requests: u64,
|
||||
external_requests: u64,
|
||||
head_latency_ms: f64,
|
||||
head_delay_ms: f64,
|
||||
peak_latency_ms: f64,
|
||||
weighted_latency_ms: f64,
|
||||
}
|
||||
@ -95,7 +95,7 @@ impl PopularityContestSubCommand {
|
||||
|
||||
highest_block = highest_block.max(head_block);
|
||||
|
||||
let head_latency_ms = conn.get("head_latency_ms").unwrap().as_f64().unwrap();
|
||||
let head_delay_ms = conn.get("head_delay_ms").unwrap().as_f64().unwrap();
|
||||
|
||||
let peak_latency_ms = conn
|
||||
.get("peak_latency_ms")
|
||||
@ -116,7 +116,7 @@ impl PopularityContestSubCommand {
|
||||
internal_requests,
|
||||
external_requests,
|
||||
head_block,
|
||||
head_latency_ms,
|
||||
head_delay_ms,
|
||||
peak_latency_ms,
|
||||
weighted_latency_ms,
|
||||
};
|
||||
@ -178,7 +178,7 @@ impl PopularityContestSubCommand {
|
||||
rpc.active_requests,
|
||||
lag,
|
||||
block_data_limit,
|
||||
format!("{:.3}", rpc.head_latency_ms),
|
||||
format!("{:.3}", rpc.head_delay_ms),
|
||||
rpc.peak_latency_ms,
|
||||
format!("{:.3}", rpc.weighted_latency_ms),
|
||||
tier,
|
||||
|
@ -342,7 +342,7 @@ pub struct ConsensusFinder {
|
||||
max_head_block_age: Option<Duration>,
|
||||
/// tier 0 will be prefered as long as the distance between it and the other tiers is <= max_tier_lag
|
||||
max_head_block_lag: Option<U64>,
|
||||
/// Block Hash -> First Seen Instant. used to track rpc.head_latency. The same cache should be shared between all ConnectionsGroups
|
||||
/// Block Hash -> First Seen Instant. used to track rpc.head_delay. The same cache should be shared between all ConnectionsGroups
|
||||
first_seen: FirstSeenCache,
|
||||
}
|
||||
|
||||
@ -383,9 +383,7 @@ impl ConsensusFinder {
|
||||
let latency = first_seen.elapsed();
|
||||
|
||||
// record the time behind the fastest node
|
||||
rpc.head_latency_ms
|
||||
.write()
|
||||
.record_secs(latency.as_secs_f32());
|
||||
rpc.head_delay.write().record_secs(latency.as_secs_f32());
|
||||
|
||||
// update the local mapping of rpc -> block
|
||||
self.rpc_heads.insert(rpc, block)
|
||||
@ -450,7 +448,7 @@ impl ConsensusFinder {
|
||||
let mut median_latencies_sec = HashMap::new();
|
||||
for rpc in self.rpc_heads.keys() {
|
||||
let median_latency_sec = rpc
|
||||
.request_latency
|
||||
.median_latency
|
||||
.as_ref()
|
||||
.map(|x| x.seconds())
|
||||
.unwrap_or_default();
|
||||
|
@ -460,7 +460,7 @@ impl Web3Rpcs {
|
||||
trace!("{} vs {}", rpc_a, rpc_b);
|
||||
// TODO: cached key to save a read lock
|
||||
// TODO: ties to the server with the smallest block_data_limit
|
||||
let faster_rpc = min_by_key(rpc_a, rpc_b, |x| x.weighted_peak_ewma_seconds());
|
||||
let faster_rpc = min_by_key(rpc_a, rpc_b, |x| x.weighted_peak_latency());
|
||||
trace!("winner: {}", faster_rpc);
|
||||
|
||||
// add to the skip list in case this one fails
|
||||
|
@ -18,7 +18,6 @@ use latency::{EwmaLatency, PeakEwmaLatency, RollingQuantileLatency};
|
||||
use log::{debug, info, trace, warn, Level};
|
||||
use migration::sea_orm::DatabaseConnection;
|
||||
use nanorand::Rng;
|
||||
use ordered_float::OrderedFloat;
|
||||
use parking_lot::RwLock;
|
||||
use redis_rate_limiter::{RedisPool, RedisRateLimitResult, RedisRateLimiter};
|
||||
use serde::ser::{SerializeStruct, Serializer};
|
||||
@ -64,7 +63,7 @@ pub struct Web3Rpc {
|
||||
pub(super) head_block: Option<watch::Sender<Option<Web3ProxyBlock>>>,
|
||||
/// Track head block latency.
|
||||
/// RwLock is fine because this isn't updated often and is for monitoring. It is not used on the hot path.
|
||||
pub(super) head_latency_ms: RwLock<EwmaLatency>,
|
||||
pub(super) head_delay: RwLock<EwmaLatency>,
|
||||
/// Track peak request latency
|
||||
/// peak_latency is only inside an Option so that the "Default" derive works. it will always be set.
|
||||
pub(super) peak_latency: Option<PeakEwmaLatency>,
|
||||
@ -76,7 +75,7 @@ pub struct Web3Rpc {
|
||||
pub(super) external_requests: AtomicUsize,
|
||||
/// Track time used by external requests served
|
||||
/// request_ms_histogram is only inside an Option so that the "Default" derive works. it will always be set.
|
||||
pub(super) request_latency: Option<RollingQuantileLatency>,
|
||||
pub(super) median_latency: Option<RollingQuantileLatency>,
|
||||
/// Track in-flight requests
|
||||
pub(super) active_requests: AtomicUsize,
|
||||
/// disconnect_watch is only inside an Option so that the "Default" derive works. it will always be set.
|
||||
@ -172,7 +171,7 @@ impl Web3Rpc {
|
||||
Duration::from_secs(1),
|
||||
);
|
||||
|
||||
let request_latency = RollingQuantileLatency::spawn_median(1_000).await;
|
||||
let median_request_latency = RollingQuantileLatency::spawn_median(1_000).await;
|
||||
|
||||
let http_provider = if let Some(http_url) = config.http_url {
|
||||
let http_url = http_url.parse::<Url>()?;
|
||||
@ -208,7 +207,7 @@ impl Web3Rpc {
|
||||
http_provider,
|
||||
name,
|
||||
peak_latency: Some(peak_latency),
|
||||
request_latency: Some(request_latency),
|
||||
median_latency: Some(median_request_latency),
|
||||
soft_limit: config.soft_limit,
|
||||
ws_url,
|
||||
disconnect_watch: Some(disconnect_watch),
|
||||
@ -266,19 +265,19 @@ impl Web3Rpc {
|
||||
pub fn sort_for_load_balancing_on(
|
||||
&self,
|
||||
max_block: Option<U64>,
|
||||
) -> ((bool, u32, Reverse<U64>), OrderedFloat<f64>) {
|
||||
) -> ((bool, u32, Reverse<U64>), Duration) {
|
||||
let sort_on = self.sort_on(max_block);
|
||||
|
||||
let weighted_peak_ewma_seconds = self.weighted_peak_ewma_seconds();
|
||||
let weighted_peak_latency = self.weighted_peak_latency();
|
||||
|
||||
let x = (sort_on, weighted_peak_ewma_seconds);
|
||||
let x = (sort_on, weighted_peak_latency);
|
||||
|
||||
trace!("sort_for_load_balancing {}: {:?}", self, x);
|
||||
|
||||
x
|
||||
}
|
||||
|
||||
/// like sort_for_load_balancing, but shuffles tiers randomly instead of sorting by weighted_peak_ewma_seconds
|
||||
/// like sort_for_load_balancing, but shuffles tiers randomly instead of sorting by weighted_peak_latency
|
||||
pub fn shuffle_for_load_balancing_on(
|
||||
&self,
|
||||
max_block: Option<U64>,
|
||||
@ -292,17 +291,17 @@ impl Web3Rpc {
|
||||
(sort_on, r)
|
||||
}
|
||||
|
||||
pub fn weighted_peak_ewma_seconds(&self) -> OrderedFloat<f64> {
|
||||
pub fn weighted_peak_latency(&self) -> Duration {
|
||||
let peak_latency = if let Some(peak_latency) = self.peak_latency.as_ref() {
|
||||
peak_latency.latency().as_secs_f64()
|
||||
peak_latency.latency()
|
||||
} else {
|
||||
1.0
|
||||
Duration::from_secs(1)
|
||||
};
|
||||
|
||||
// TODO: what ordering?
|
||||
let active_requests = self.active_requests.load(atomic::Ordering::Acquire) as f64 + 1.0;
|
||||
let active_requests = self.active_requests.load(atomic::Ordering::Acquire) as f32 + 1.0;
|
||||
|
||||
OrderedFloat(peak_latency * active_requests)
|
||||
peak_latency.mul_f32(active_requests)
|
||||
}
|
||||
|
||||
// TODO: would be great if rpcs exposed this. see https://github.com/ledgerwatch/erigon/issues/6391
|
||||
@ -697,7 +696,7 @@ impl Web3Rpc {
|
||||
let rpc = self.clone();
|
||||
|
||||
// TODO: how often? different depending on the chain?
|
||||
// TODO: reset this timeout when a new block is seen? we need to keep request_latency updated though
|
||||
// TODO: reset this timeout when a new block is seen? we need to keep median_request_latency updated though
|
||||
let health_sleep_seconds = 5;
|
||||
|
||||
// health check loop
|
||||
@ -1173,29 +1172,30 @@ impl Serialize for Web3Rpc {
|
||||
&self.active_requests.load(atomic::Ordering::Relaxed),
|
||||
)?;
|
||||
|
||||
state.serialize_field(
|
||||
"head_latency_ms",
|
||||
&self.head_latency_ms.read().duration().as_millis(),
|
||||
)?;
|
||||
|
||||
state.serialize_field(
|
||||
"request_latency_ms",
|
||||
&self
|
||||
.request_latency
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.duration()
|
||||
.as_millis(),
|
||||
)?;
|
||||
|
||||
state.serialize_field(
|
||||
"peak_latency_ms",
|
||||
&self.peak_latency.as_ref().unwrap().latency().as_millis(),
|
||||
)?;
|
||||
{
|
||||
let head_delay_ms = self.head_delay.read().latency().as_secs_f32() * 1000.0;
|
||||
state.serialize_field("head_delay_ms", &(head_delay_ms))?;
|
||||
}
|
||||
|
||||
{
|
||||
let weighted_latency_ms = self.weighted_peak_ewma_seconds() * 1000.0;
|
||||
state.serialize_field("weighted_latency_ms", weighted_latency_ms.as_ref())?;
|
||||
let median_latency_ms = self
|
||||
.median_latency
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.latency()
|
||||
.as_secs_f32()
|
||||
* 1000.0;
|
||||
state.serialize_field("median_latency_ms", &(median_latency_ms))?;
|
||||
}
|
||||
|
||||
{
|
||||
let peak_latency_ms =
|
||||
self.peak_latency.as_ref().unwrap().latency().as_secs_f32() * 1000.0;
|
||||
state.serialize_field("peak_latency_ms", &peak_latency_ms)?;
|
||||
}
|
||||
{
|
||||
let weighted_latency_ms = self.weighted_peak_latency().as_secs_f32() * 1000.0;
|
||||
state.serialize_field("weighted_latency_ms", &weighted_latency_ms)?;
|
||||
}
|
||||
|
||||
state.end()
|
||||
|
@ -383,7 +383,7 @@ impl OpenRequestHandle {
|
||||
|
||||
self.rpc.peak_latency.as_ref().unwrap().report(latency);
|
||||
self.rpc
|
||||
.request_latency
|
||||
.median_latency
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.record(latency)
|
||||
|
Loading…
Reference in New Issue
Block a user