web3-proxy/web3_proxy/src/config.rs
Bryan Stitt e917a11d6c
Suprisingly large refactor to get ids everywhere (#222)
* cargo upgrade --incompatible and update

* first draft at suprisingly_large_refactor_to_get_ids_everywhere

* put app in a task_local

* ref cleanup

* use a OnceLock instead of a tokio local

* test more methods

* APP isn't set in all tests

* it compiles. tests fail. todos still open

* use the app only when necessary

* more tests. less panic

* less verbose debug impl

* short enum names

* move kafka and secrets to their own files

* main tests pass

* add debug chain block time

* helper for stats that ignores internal stats

* Update Jenkinsfile (#223)

* more tests

---------

Co-authored-by: Pewxz <124064710+pewxz@users.noreply.github.com>
2023-10-03 13:46:27 -07:00

490 lines
16 KiB
Rust

use crate::app::Web3ProxyJoinHandle;
use crate::compute_units::default_usd_per_cu;
use crate::rpcs::blockchain::{BlocksByHashCache, Web3ProxyBlock};
use crate::rpcs::one::Web3Rpc;
use argh::FromArgs;
use ethers::prelude::{Address, TxHash};
use ethers::types::{U256, U64};
use hashbrown::HashMap;
use migration::sea_orm::prelude::Decimal;
use sentry::types::Dsn;
use serde::{de, Deserialize, Deserializer};
use serde_inline_default::serde_inline_default;
use std::fmt;
use std::sync::atomic::AtomicU64;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::mpsc;
use tracing::warn;
pub type BlockAndRpc = (Option<Web3ProxyBlock>, Arc<Web3Rpc>);
pub type TxHashAndRpc = (TxHash, Arc<Web3Rpc>);
#[derive(Debug, FromArgs)]
/// Web3_proxy is a fast caching and load balancing proxy for web3 (Ethereum or similar) JsonRPC servers.
pub struct CliConfig {
/// path to a toml of rpc servers
#[argh(option, default = "\"./config/development.toml\".to_string()")]
pub config: String,
/// what port the proxy should listen on
#[argh(option, default = "8544")]
pub port: u16,
/// what port the proxy should expose prometheus stats on
#[argh(option, default = "8543")]
pub prometheus_port: u16,
/// number of worker threads. Defaults to the number of logical processors
#[argh(option, default = "0")]
pub workers: usize,
/// path to a binary file used to encrypt cookies. Should be at least 64 bytes.
#[argh(option, default = "\"./data/development_cookie_key\".to_string()")]
pub cookie_key_filename: String,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
pub struct TopConfig {
pub app: AppConfig,
pub balanced_rpcs: HashMap<String, Web3RpcConfig>,
#[serde(default = "Default::default")]
pub private_rpcs: HashMap<String, Web3RpcConfig>,
#[serde(default = "Default::default")]
pub bundler_4337_rpcs: HashMap<String, Web3RpcConfig>,
/// unknown config options get put here
#[serde(flatten, default = "HashMap::default")]
pub extra: HashMap<String, serde_json::Value>,
}
impl TopConfig {
/// TODO: this should probably be part of Deserialize
pub fn clean(&mut self) {
if !self.extra.is_empty() {
warn!(
extra=?self.extra.keys(),
"unknown TopConfig fields!",
);
}
self.app.clean();
}
}
/// shared configuration between Web3Rpcs
// TODO: no String, only &str
#[serde_inline_default]
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
pub struct AppConfig {
/// Request limit for allowed origins for anonymous users.
/// These requests get rate limited by IP.
#[serde(default = "Default::default")]
pub allowed_origin_requests_per_period: HashMap<String, u64>,
/// erigon defaults to pruning beyond 90,000 blocks
#[serde_inline_default(90_000u64)]
pub archive_depth: u64,
/// pool of extra connections allowed for authenticated users
#[serde_inline_default(0usize)]
pub bonus_premium_concurrency: usize,
/// pool of extra connections allowed for anonymous users
#[serde_inline_default(0usize)]
pub bonus_public_concurrency: usize,
/// pool of extra requests per second allowed for authenticaed users
#[serde_inline_default(0u64)]
pub bonus_frontend_public_rate_limit: u64,
#[serde_inline_default(0u64)]
pub bonus_frontend_premium_rate_limit: u64,
/// EVM chain id. 1 for ETH
/// TODO: better type for chain_id? max of `u64::MAX / 2 - 36` <https://github.com/ethereum/EIPs/issues/2294>
#[serde_inline_default(1u64)]
pub chain_id: u64,
/// Cost per computational unit
// pub cost_per_cu: Decimal,
/// Database is used for user data.
/// Currently supports mysql or compatible backend.
pub db_url: Option<String>,
/// minimum size of the connection pool for the database.
/// If none, the number of workers are used.
pub db_min_connections: Option<u32>,
/// maximum size of the connection pool for the database.
/// If none, the minimum * 2 is used.
pub db_max_connections: Option<u32>,
/// Read-only replica of db_url.
pub db_replica_url: Option<String>,
/// minimum size of the connection pool for the database replica.
/// If none, db_min_connections is used.
pub db_replica_min_connections: Option<u32>,
/// maximum size of the connection pool for the database replica.
/// If none, db_max_connections is used.
pub db_replica_max_connections: Option<u32>,
/// Default request limit for registered users.
/// 0 = block all requests
/// None = allow all requests
pub default_user_max_requests_per_period: Option<u64>,
/// Default ERC address for out deposit contract
pub deposit_factory_contract: Option<Address>,
/// True if anonymous users should be able to eth_subscribe
/// newHeads is always allowed because that is cheap to send
#[serde_inline_default(false)]
pub free_subscriptions: bool,
/// minimum amount to increase eth_estimateGas results
pub gas_increase_min: Option<U256>,
/// percentage to increase eth_estimateGas results. 100 == 100%
pub gas_increase_percent: Option<U256>,
/// Restrict user registration.
/// None = no code needed
pub invite_code: Option<String>,
/// Optional kafka brokers
/// Used by /debug/:rpc_key urls for logging requests and responses. No other endpoints log request/response data.
pub kafka_urls: Option<String>,
#[serde_inline_default("ssl".to_string())]
pub kafka_protocol: String,
/// domain in sign-in-with-ethereum messages
pub login_domain: Option<String>,
/// do not serve any requests if the best known block is behind the best known block by more than this many blocks.
pub max_head_block_lag: Option<U64>,
/// Rate limit for the login entrypoint.
/// This is separate from the rpc limits.
#[serde_inline_default(10u64)]
pub login_rate_limit_per_period: u64,
/// The soft limit prevents thundering herds as new blocks are seen.
#[serde_inline_default(1u32)]
pub min_sum_soft_limit: u32,
/// Another knob for preventing thundering herds as new blocks are seen.
#[serde_inline_default(1usize)]
pub min_synced_rpcs: usize,
/// Concurrent request limit for anonymous users.
/// Some(0) = block all requests
/// None = allow all requests
pub public_max_concurrent_requests: Option<usize>,
/// Request limit for anonymous users.
/// Some(0) = block all requests
/// None = allow all requests
pub public_requests_per_period: Option<u64>,
/// Salt for hashing recent ips. Not a perfect way to introduce privacy, but better than nothing
pub public_recent_ips_salt: Option<String>,
/// RPC responses are cached locally
#[serde_inline_default(10u64.pow(8))]
pub response_cache_max_bytes: u64,
/// the stats page url for an anonymous user.
pub redirect_public_url: Option<String>,
/// the stats page url for a logged in user. if set, must contain "{rpc_key_id}"
pub redirect_rpc_key_url: Option<String>,
/// Optionally send errors to <https://sentry.io>
pub sentry_url: Option<Dsn>,
/// Stripe api key for checking validity of webhooks
pub stripe_whsec_key: Option<String>,
pub usd_per_cu: Option<Decimal>,
/// Track rate limits in a redis (or compatible backend)
/// It is okay if this data is lost.
pub volatile_redis_url: Option<String>,
/// maximum size of the connection pool for the cache
/// If none, workers * 2 is used
pub volatile_redis_max_connections: Option<usize>,
/// influxdb host for stats
pub influxdb_host: Option<String>,
/// influxdb org for stats
pub influxdb_org: Option<String>,
/// influxdb token for stats
pub influxdb_token: Option<String>,
/// influxdb bucket to use for stats
pub influxdb_bucket: Option<String>,
/// unique_id keeps stats from different servers being seen as duplicates of each other.
/// this int is used as part of the "nanoseconds" part of the influx timestamp.
/// it can also be used by the rate limiter.
///
/// This **MUST** be set to a unique value for each running server.
/// If not set, severs will overwrite eachother's stats!
///
/// <https://docs.influxdata.com/influxdb/v2.0/write-data/best-practices/duplicate-points/#increment-the-timestamp>
#[serde_inline_default(0i64)]
pub unique_id: i64,
/// unknown config options get put here
#[serde(flatten, default = "HashMap::default")]
pub extra: HashMap<String, serde_json::Value>,
}
impl Default for AppConfig {
fn default() -> Self {
serde_json::from_str("{}").unwrap()
}
}
impl AppConfig {
/// TODO: this should probably be part of Deserialize
fn clean(&mut self) {
if self.usd_per_cu.is_none() {
self.usd_per_cu = Some(default_usd_per_cu(self.chain_id));
}
if let Some(influxdb_id) = self.extra.get("influxdb_id") {
self.unique_id = influxdb_id.as_i64().unwrap();
}
if !self.extra.is_empty() {
warn!(
extra=?self.extra.keys(),
"unknown Web3ProxyAppConfig fields!",
);
}
}
}
/// TODO: we can't query a provider because we need this to create a provider
/// TODO: cache this
pub fn average_block_interval(chain_id: u64) -> Duration {
match chain_id {
// ethereum
1 => Duration::from_secs(12),
// ethereum-goerli
5 => Duration::from_secs(12),
// optimism
10 => Duration::from_secs(2),
// binance
56 => Duration::from_secs(3),
// polygon
137 => Duration::from_secs(2),
// fantom
250 => Duration::from_secs(1),
// zkevm polygon
1101 => Duration::from_secs(7),
// base
8453 => Duration::from_secs(2),
// arbitrum
42161 => Duration::from_millis(500),
// web3-proxy tests
999_001_999 => Duration::from_secs(10),
// anything else
_ => {
let default = 10;
warn!(
"unknown chain_id ({}). defaulting average_block_interval to {} seconds",
chain_id, default
);
Duration::from_secs(default)
}
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub enum BlockDataLimit {
/// archive nodes can return all data
Archive,
/// prune nodes don't have all the data
/// some devs will argue about what "prune" means but we use it to mean that any of the data is gone.
/// TODO: this is too simple. erigon can prune the different types of data differently
Set(u64),
/// Automatically detect the limit
#[default]
Unknown,
}
impl<'de> Deserialize<'de> for BlockDataLimit {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
struct BlockDataLimitVisitor;
impl<'de> de::Visitor<'de> for BlockDataLimitVisitor {
type Value = BlockDataLimit;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a string 'archive', 'unknown' or an positive signed 64-bit integer. 0 means automatically detect")
}
fn visit_str<E: de::Error>(self, value: &str) -> Result<Self::Value, E> {
match value.to_ascii_lowercase().as_str() {
"archive" => Ok(BlockDataLimit::Archive),
"unknown" => Ok(BlockDataLimit::Unknown),
_ => Err(de::Error::custom(format!("Unexpected value {}", value))),
}
}
fn visit_i64<E: de::Error>(self, v: i64) -> Result<Self::Value, E> {
if v < 0 {
Err(de::Error::custom("Negative values are not allowed"))
} else {
Ok(BlockDataLimit::Set(v as u64))
}
}
}
deserializer.deserialize_any(BlockDataLimitVisitor)
}
}
impl From<BlockDataLimit> for AtomicU64 {
fn from(value: BlockDataLimit) -> Self {
match value {
BlockDataLimit::Archive => AtomicU64::new(u64::MAX),
BlockDataLimit::Set(limit) => AtomicU64::new(limit),
BlockDataLimit::Unknown => AtomicU64::new(0),
}
}
}
/// Configuration for a backend web3 RPC server
#[serde_inline_default]
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
pub struct Web3RpcConfig {
/// simple way to disable a connection without deleting the row
#[serde(default = "Default::default")]
pub disabled: bool,
/// a name used in /status and other user facing messages
pub display_name: Option<String>,
/// while not absolutely required, a ws:// or wss:// connection will be able to subscribe to head blocks
pub ws_url: Option<String>,
/// while not absolutely required, a http:// or https:// connection will allow erigon to stream JSON
pub http_url: Option<String>,
/// block data limit. If None, will be queried
#[serde(default = "Default::default")]
pub block_data_limit: BlockDataLimit,
/// the requests per second at which the server starts slowing down
#[serde_inline_default(1u32)]
pub soft_limit: u32,
/// the requests per period at which the server throws errors (rate limit or otherwise)
pub hard_limit: Option<u64>,
/// the number of seconds in a rate limiting period
/// some providers allow burst limits and rolling windows, but coding that is a lot more complicated
#[serde_inline_default(1u32)]
pub hard_limit_period: u32,
/// if hard limits are applied per server or per endpoint. default is per server
#[serde(default = "Default::default")]
pub hard_limit_per_endpoint: bool,
/// only use this rpc if everything else is lagging too far. this allows us to ignore fast but very low limit rpcs
#[serde(default = "Default::default")]
pub backup: bool,
/// Subscribe to the firehose of pending transactions
/// Don't do this with free rpcs
#[serde(default = "Default::default")]
pub subscribe_txs: bool,
/// unknown config options get put here
#[serde(flatten, default = "HashMap::default")]
pub extra: HashMap<String, serde_json::Value>,
}
impl Default for Web3RpcConfig {
fn default() -> Self {
serde_json::from_str("{}").unwrap()
}
}
impl Web3RpcConfig {
/// Create a Web3Rpc from config
/// TODO: move this into Web3Rpc? (just need to make things pub(crate))
#[allow(clippy::too_many_arguments)]
pub async fn spawn(
self,
name: String,
redis_pool: Option<redis_rate_limiter::RedisPool>,
server_id: i64,
chain_id: u64,
block_interval: Duration,
http_client: Option<reqwest::Client>,
blocks_by_hash_cache: BlocksByHashCache,
block_and_rpc_sender: Option<mpsc::UnboundedSender<BlockAndRpc>>,
pending_txid_firehouse_sender: Option<mpsc::Sender<TxHash>>,
max_head_block_age: Duration,
) -> anyhow::Result<(Arc<Web3Rpc>, Web3ProxyJoinHandle<()>)> {
if !self.extra.is_empty() {
// TODO: move this to a `clean` function
warn!(extra=?self.extra.keys(), "unknown Web3RpcConfig fields!");
}
Web3Rpc::spawn(
self,
name,
chain_id,
http_client,
redis_pool,
server_id,
block_interval,
blocks_by_hash_cache,
block_and_rpc_sender,
pending_txid_firehouse_sender,
max_head_block_age,
)
.await
}
}
#[cfg(test)]
mod tests {
use super::{AppConfig, Web3RpcConfig};
use serde_json::json;
#[test]
fn expected_app_defaults() {
// a is from serde
let a: AppConfig = serde_json::from_value(json!({
"chain_id": 1,
}))
.unwrap();
assert_eq!(a.min_synced_rpcs, 1);
// b is from Default
let b = AppConfig::default();
assert_eq!(b.min_synced_rpcs, 1);
assert_eq!(a, b);
}
#[test]
fn expected_rpc_defaults() {
let a: Web3RpcConfig = serde_json::from_str("{}").unwrap();
assert_eq!(a.soft_limit, 1);
let b: Web3RpcConfig = Default::default();
assert_eq!(b.soft_limit, 1);
assert_eq!(a, b);
}
}