From eb4d05a520658695dc04c0bb70e6b21545ac0b79 Mon Sep 17 00:00:00 2001 From: Bryan Stitt Date: Wed, 25 Jan 2023 21:24:09 -0800 Subject: [PATCH] stats v2 rebased all my commits and squashed them down to one --- .cargo/config.toml | 1 + .vscode/settings.json | 4 +- Cargo.lock | 556 +++++++++++++---- Dockerfile | 5 +- TODO.md | 81 ++- config/example.toml | 5 + entities/src/login.rs | 2 +- entities/src/mod.rs | 3 +- entities/src/pending_login.rs | 2 +- entities/src/prelude.rs | 3 +- entities/src/revert_log.rs | 2 +- entities/src/rpc_accounting.rs | 2 +- entities/src/rpc_accounting_v2.rs | 47 ++ entities/src/rpc_key.rs | 15 +- entities/src/sea_orm_active_enums.rs | 7 +- entities/src/secondary_user.rs | 2 +- entities/src/user.rs | 2 +- entities/src/user_tier.rs | 2 +- migration/src/lib.rs | 2 + migration/src/m20230125_204810_stats_v2.rs | 157 +++++ redis-rate-limiter/Cargo.toml | 1 + redis-rate-limiter/src/lib.rs | 6 +- web3_proxy/Cargo.toml | 6 +- web3_proxy/src/admin_queries.rs | 2 +- web3_proxy/src/app/mod.rs | 157 +++-- web3_proxy/src/app/ws.rs | 29 +- web3_proxy/src/app_stats.rs | 416 ------------- web3_proxy/src/bin/web3_proxy_cli/main.rs | 3 + web3_proxy/src/bin/web3_proxy_cli/proxyd.rs | 228 +++---- .../src/bin/web3_proxy_cli/user_export.rs | 3 +- web3_proxy/src/config.rs | 11 +- web3_proxy/src/frontend/authorization.rs | 15 +- web3_proxy/src/frontend/mod.rs | 33 +- web3_proxy/src/frontend/rpc_proxy_ws.rs | 8 +- web3_proxy/src/frontend/status.rs | 4 +- web3_proxy/src/frontend/users.rs | 24 +- web3_proxy/src/http_params.rs | 206 ++++++ web3_proxy/src/jsonrpc.rs | 3 +- web3_proxy/src/lib.rs | 6 +- web3_proxy/src/metered/jsonrpc_error_count.rs | 54 -- web3_proxy/src/metered/mod.rs | 5 - .../src/metered/provider_error_count.rs | 51 -- web3_proxy/src/pagerduty.rs | 10 +- .../{metrics_frontend.rs => prometheus.rs} | 33 +- web3_proxy/src/rpcs/blockchain.rs | 53 +- web3_proxy/src/rpcs/consensus.rs | 21 +- web3_proxy/src/rpcs/many.rs | 98 ++- web3_proxy/src/rpcs/one.rs | 73 ++- web3_proxy/src/rpcs/request.rs | 215 ++++--- .../{user_queries.rs => stats/db_queries.rs} | 420 ++++++------- web3_proxy/src/stats/influxdb_queries.rs | 41 ++ web3_proxy/src/stats/mod.rs | 584 ++++++++++++++++++ 52 files changed, 2409 insertions(+), 1310 deletions(-) create mode 100644 entities/src/rpc_accounting_v2.rs create mode 100644 migration/src/m20230125_204810_stats_v2.rs delete mode 100644 web3_proxy/src/app_stats.rs create mode 100644 web3_proxy/src/http_params.rs delete mode 100644 web3_proxy/src/metered/jsonrpc_error_count.rs delete mode 100644 web3_proxy/src/metered/mod.rs delete mode 100644 web3_proxy/src/metered/provider_error_count.rs rename web3_proxy/src/{metrics_frontend.rs => prometheus.rs} (50%) rename web3_proxy/src/{user_queries.rs => stats/db_queries.rs} (55%) create mode 100644 web3_proxy/src/stats/influxdb_queries.rs create mode 100644 web3_proxy/src/stats/mod.rs diff --git a/.cargo/config.toml b/.cargo/config.toml index f4ad2dbf..1ebaa03c 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,6 +1,7 @@ [build] rustflags = [ # potentially faster. https://nnethercote.github.io/perf-book/build-configuration.html + # TODO: we might want to disable this so its easier to run the proxy across different aws instance types "-C", "target-cpu=native", # tokio unstable is needed for tokio-console "--cfg", "tokio_unstable" diff --git a/.vscode/settings.json b/.vscode/settings.json index f5ab95b6..9e26dfee 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1 @@ -{ - "rust-analyzer.cargo.features": "all" -} \ No newline at end of file +{} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 2e67ec28..8e0b16ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,6 +96,15 @@ dependencies = [ "libc", ] +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + [[package]] name = "anyhow" version = "1.0.69" @@ -133,6 +142,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64cb94155d965e3d37ffbbe7cc5b82c3dd79dd33bd48e536f73d2cfb8d85506f" +[[package]] +name = "arrayvec" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23b62fc65de8e4e7f52534fb52b0f3ed04746ae267519eef2a83941e8085068b" + [[package]] name = "arrayvec" version = "0.7.2" @@ -180,19 +195,20 @@ dependencies = [ [[package]] name = "async-stream" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" +checksum = "ad445822218ce64be7a341abfb0b1ea43b5c23aa83902542a4542e78309d8e5e" dependencies = [ "async-stream-impl", "futures-core", + "pin-project-lite", ] [[package]] name = "async-stream-impl" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" +checksum = "e4655ae1a7b0cdf149156f780c5bf3f1352bc53cbd9e0a361a7ef7b22947e965" dependencies = [ "proc-macro2", "quote", @@ -296,7 +312,7 @@ dependencies = [ "http", "http-body", "hyper", - "itoa 1.0.5", + "itoa", "matchit", "memchr", "mime", @@ -351,7 +367,7 @@ version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fbf955307ff8addb48d2399393c9e2740dd491537ec562b66ab364fc4a38841" dependencies = [ - "heck 0.4.0", + "heck 0.4.1", "proc-macro2", "quote", "syn", @@ -488,16 +504,28 @@ dependencies = [ "radium 0.3.0", ] +[[package]] +name = "bitvec" +version = "0.19.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55f93d0ef3363c364d5976646a38f04cf67cfe1d4c8d160cdea02cab2c116b33" +dependencies = [ + "funty 1.1.0", + "radium 0.5.3", + "tap", + "wyz 0.2.0", +] + [[package]] name = "bitvec" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" dependencies = [ - "funty", + "funty 2.0.0", "radium 0.7.0", "tap", - "wyz", + "wyz 0.5.1", ] [[package]] @@ -550,19 +578,19 @@ dependencies = [ [[package]] name = "borsh" -version = "0.9.3" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15bf3650200d8bffa99015595e10f1fbd17de07abbc25bb067da79e769939bfa" +checksum = "40f9ca3698b2e4cb7c15571db0abc5551dca417a21ae8140460b50309bb2cc62" dependencies = [ "borsh-derive", - "hashbrown 0.11.2", + "hashbrown 0.13.2", ] [[package]] name = "borsh-derive" -version = "0.9.3" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6441c552f230375d18e3cc377677914d2ca2b0d36e52129fe15450a2dce46775" +checksum = "598b3eacc6db9c3ee57b22707ad8f6a8d2f6d442bfe24ffeb8cbb70ca59e6a35" dependencies = [ "borsh-derive-internal", "borsh-schema-derive-internal", @@ -573,9 +601,9 @@ dependencies = [ [[package]] name = "borsh-derive-internal" -version = "0.9.3" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5449c28a7b352f2d1e592a8a28bf139bc71afb0764a14f3c02500935d8c44065" +checksum = "186b734fa1c9f6743e90c95d7233c9faab6360d1a96d4ffa19d9cfd1e9350f8a" dependencies = [ "proc-macro2", "quote", @@ -584,9 +612,9 @@ dependencies = [ [[package]] name = "borsh-schema-derive-internal" -version = "0.9.3" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdbd5696d8bfa21d53d9fe39a714a18538bad11492a42d066dbbc395fb1951c0" +checksum = "99b7ff1008316626f485991b960ade129253d4034014616b94f309a15366cc49" dependencies = [ "proc-macro2", "quote", @@ -599,18 +627,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "771fe0050b883fcc3ea2359b1a96bcfbc090b7116eae7c3c512c7a083fdf23d3" -[[package]] -name = "bstr" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - [[package]] name = "bumpalo" version = "3.12.0" @@ -725,9 +741,9 @@ dependencies = [ [[package]] name = "cargo_metadata" -version = "0.15.2" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982a0cf6a99c350d7246035613882e376d58cebe571785abc5da4f648d53ac0a" +checksum = "08a1ec454bc3eead8719cb56e15dbbfecdbc14e4b3a3ae4936cc6e31f5fc0d07" dependencies = [ "camino", "cargo-platform", @@ -810,7 +826,7 @@ version = "3.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" dependencies = [ - "heck 0.4.0", + "heck 0.4.1", "proc-macro-error", "proc-macro2", "quote", @@ -1108,13 +1124,12 @@ dependencies = [ [[package]] name = "csv" -version = "1.1.6" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +checksum = "af91f40b7355f82b0a891f50e70399475945bb0b0da4f1700ce60761c9d3e359" dependencies = [ - "bstr", "csv-core", - "itoa 0.4.8", + "itoa", "ryu", "serde", ] @@ -1139,9 +1154,9 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.88" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322296e2f2e5af4270b54df9e85a02ff037e271af20ba3e7fe1575515dc840b8" +checksum = "86d3488e7665a7a483b57e25bdd90d0aeb2bc7608c8d0346acf2ad3f1caf1d62" dependencies = [ "cc", "cxxbridge-flags", @@ -1151,9 +1166,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.88" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "017a1385b05d631e7875b1f151c9f012d37b53491e2a87f65bff5c262b2111d8" +checksum = "48fcaf066a053a41a81dfb14d57d99738b767febb8b735c3016e469fac5da690" dependencies = [ "cc", "codespan-reporting", @@ -1166,21 +1181,31 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.88" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c26bbb078acf09bc1ecda02d4223f03bdd28bd4874edcb0379138efc499ce971" +checksum = "a2ef98b8b717a829ca5603af80e1f9e2e48013ab227b68ef37872ef84ee479bf" [[package]] name = "cxxbridge-macro" -version = "1.0.88" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357f40d1f06a24b60ae1fe122542c1fb05d28d32acb2aed064e84bc2ad1e252e" +checksum = "086c685979a698443656e5cf7856c95c642295a38599f12fb1ff76fb28d19892" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "dashmap" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c" +dependencies = [ + "cfg-if", + "num_cpus", +] + [[package]] name = "deadpool" version = "0.9.5" @@ -1339,6 +1364,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "dotenv" version = "0.15.0" @@ -1642,9 +1673,9 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ade3e9c97727343984e1ceada4fdab11142d2ee3472d2c67027d56b1251d4f15" dependencies = [ - "arrayvec", + "arrayvec 0.7.2", "bytes", - "cargo_metadata 0.15.2", + "cargo_metadata 0.15.3", "chrono", "convert_case 0.6.0", "elliptic-curve", @@ -1826,10 +1857,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" [[package]] -name = "fastrand" -version = "1.8.0" +name = "fallible-iterator" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" dependencies = [ "instant", ] @@ -1891,7 +1928,7 @@ dependencies = [ "futures-sink", "nanorand", "pin-project", - "spin 0.9.4", + "spin 0.9.5", ] [[package]] @@ -1929,6 +1966,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + [[package]] name = "funty" version = "2.0.0" @@ -2130,6 +2173,12 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "go-parse-duration" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "558b88954871f5e5b2af0e62e2e176c8bde7a6c2c4ed41b13d138d96da2e2cbd" + [[package]] name = "group" version = "0.12.1" @@ -2183,15 +2232,6 @@ dependencies = [ "byteorder", ] -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" -dependencies = [ - "ahash 0.7.6", -] - [[package]] name = "hashbrown" version = "0.12.3" @@ -2239,7 +2279,7 @@ dependencies = [ "byteorder", "crossbeam-channel", "flate2", - "nom", + "nom 7.1.3", "num-traits", ] @@ -2277,7 +2317,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin 0.9.4", + "spin 0.9.5", "stable_deref_trait", ] @@ -2292,9 +2332,9 @@ dependencies = [ [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" dependencies = [ "unicode-segmentation", ] @@ -2317,6 +2357,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + [[package]] name = "hex" version = "0.4.3" @@ -2360,7 +2406,7 @@ checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" dependencies = [ "bytes", "fnv", - "itoa 1.0.5", + "itoa", ] [[package]] @@ -2413,7 +2459,7 @@ dependencies = [ "http-body", "httparse", "httpdate", - "itoa 1.0.5", + "itoa", "pin-project-lite", "socket2", "tokio", @@ -2535,6 +2581,62 @@ dependencies = [ "regex", ] +[[package]] +name = "influxdb2" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9adea4aa306353d8cdc2920bf9206af2c37275fe51835ab61e06fa3c5fbf14e" +dependencies = [ + "base64 0.13.1", + "bytes", + "chrono", + "csv", + "dotenv", + "fallible-iterator", + "futures", + "go-parse-duration", + "influxdb2-derive", + "influxdb2-structmap", + "nom 6.1.2", + "opentelemetry", + "ordered-float", + "parking_lot 0.11.2", + "reqwest", + "serde", + "serde_json", + "serde_qs", + "smallvec", + "snafu", + "tempfile", + "tracing", + "tracing-subscriber 0.2.25", + "url", +] + +[[package]] +name = "influxdb2-derive" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1e007e3c8368af353f58831a0fdb1b6649df4a8f0a33aa6455fc69a896bbc30" +dependencies = [ + "itertools", + "proc-macro2", + "quote", + "regex", + "syn", +] + +[[package]] +name = "influxdb2-structmap" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1408e712051787357e99ff732e44e8833e79cea0fabc9361018abfbff72b6265" +dependencies = [ + "chrono", + "num-traits", + "ordered-float", +] + [[package]] name = "inout" version = "0.1.3" @@ -2558,12 +2660,22 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7d6c6f8c91b4b9ed43484ad1a938e393caf35960fce7f82a040497207bd8e9e" +checksum = "1abeb7a0dd0f8181267ff8adc397075586500b81b28a73e8a0208b00fc170fb3" dependencies = [ "libc", - "windows-sys 0.42.0", + "windows-sys 0.45.0", +] + +[[package]] +name = "io-uring" +version = "0.5.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41c85eff7f7c8d3ab8c7ec87313c0c194bbaf4371bb7d40f80293ba01bce8264" +dependencies = [ + "bitflags", + "libc", ] [[package]] @@ -2584,14 +2696,14 @@ dependencies = [ [[package]] name = "is-terminal" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28dfb6c8100ccc63462345b67d1bbc3679177c75ee4bf59bf29c8b1d110b8189" +checksum = "22e18b0a45d56fe973d6db23972bf5bc46f988a4a2385deac9cc29572f09daef" dependencies = [ - "hermit-abi 0.2.6", + "hermit-abi 0.3.1", "io-lifetimes", "rustix", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -2603,12 +2715,6 @@ dependencies = [ "either", ] -[[package]] -name = "itoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" - [[package]] name = "itoa" version = "1.0.5" @@ -2696,6 +2802,19 @@ dependencies = [ "spin 0.5.2", ] +[[package]] +name = "lexical-core" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6607c62aa161d23d17a9072cc5da0be67cdfc89d3afb1e8d9c842bebc2525ffe" +dependencies = [ + "arrayvec 0.5.2", + "bitflags", + "cfg-if", + "ryu", + "static_assertions", +] + [[package]] name = "libc" version = "0.2.139" @@ -2760,6 +2879,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" +[[package]] +name = "matchers" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1" +dependencies = [ + "regex-automata", +] + [[package]] name = "matchers" version = "0.1.0" @@ -2880,6 +3008,19 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54" +[[package]] +name = "nom" +version = "6.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2" +dependencies = [ + "bitvec 0.19.6", + "funty 1.1.0", + "lexical-core", + "memchr", + "version_check", +] + [[package]] name = "nom" version = "7.1.3" @@ -3069,7 +3210,7 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "786393f80485445794f6043fd3138854dd109cc6c4bd1a6383db304c9ce9b9ce" dependencies = [ - "arrayvec", + "arrayvec 0.7.2", "auto_impl 1.0.1", "bytes", "ethereum-types", @@ -3088,6 +3229,26 @@ dependencies = [ "syn", ] +[[package]] +name = "opentelemetry" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91cea1dfd50064e52db033179952d18c770cbc5dfefc8eba45d619357ba3914" +dependencies = [ + "async-trait", + "dashmap", + "fnv", + "futures", + "js-sys", + "lazy_static", + "percent-encoding", + "pin-project", + "rand", + "thiserror", + "tokio", + "tokio-stream", +] + [[package]] name = "ordered-float" version = "3.4.0" @@ -3116,9 +3277,9 @@ checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" [[package]] name = "ouroboros" -version = "0.15.5" +version = "0.15.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfbb50b356159620db6ac971c6d5c9ab788c9cc38a6f49619fca2a27acb062ca" +checksum = "e1358bd1558bd2a083fed428ffeda486fbfb323e698cdda7794259d592ca72db" dependencies = [ "aliasable", "ouroboros_macro", @@ -3126,9 +3287,9 @@ dependencies = [ [[package]] name = "ouroboros_macro" -version = "0.15.5" +version = "0.15.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0d9d1a6191c4f391f87219d1ea42b23f09ee84d64763cd05ee6ea88d9f384d" +checksum = "5f7d21ccd03305a674437ee1248f3ab5d4b1db095cf1caf49f1713ddf61956b7" dependencies = [ "Inflector", "proc-macro-error", @@ -3151,11 +3312,11 @@ dependencies = [ [[package]] name = "parity-scale-codec" -version = "3.2.2" +version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ab01d0f889e957861bc65888d5ccbe82c158d0270136ba46820d43837cdf72" +checksum = "637935964ff85a605d114591d4d2c13c5d1ba2806dae97cea6bf180238a749ac" dependencies = [ - "arrayvec", + "arrayvec 0.7.2", "bitvec 1.0.1", "byte-slice-cast", "impl-trait-for-tuples", @@ -3284,9 +3445,9 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "pest" -version = "2.5.4" +version = "2.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ab62d2fa33726dbe6321cc97ef96d8cde531e3eeaf858a058de53a8a6d40d8f" +checksum = "028accff104c4e513bad663bbcd2ad7cfd5304144404c31ed0a77ac103d00660" dependencies = [ "thiserror", "ucd-trie", @@ -3294,9 +3455,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.5.4" +version = "2.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf026e2d0581559db66d837fe5242320f525d85c76283c61f4d51a1238d65ea" +checksum = "2ac3922aac69a40733080f53c1ce7f91dcf57e1a5f6c52f421fadec7fbdc4b69" dependencies = [ "pest", "pest_generator", @@ -3304,9 +3465,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.5.4" +version = "2.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b27bd18aa01d91c8ed2b61ea23406a676b42d82609c6e2581fba42f0c15f17f" +checksum = "d06646e185566b5961b4058dd107e0a7f56e77c3f484549fb119867773c0f202" dependencies = [ "pest", "pest_meta", @@ -3317,9 +3478,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.5.4" +version = "2.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f02b677c1859756359fc9983c2e56a0237f18624a3789528804406b7e915e5d" +checksum = "e6f60b2ba541577e2a0c307c8f39d1439108120eb7903adeb6497fa880c59616" dependencies = [ "once_cell", "pest", @@ -3536,7 +3697,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "66618389e4ec1c7afe67d51a9bf34ff9236480f8d51e7489b7d5ab0303c13f34" dependencies = [ "once_cell", - "toml_edit 0.18.0", + "toml_edit 0.18.1", ] [[package]] @@ -3635,6 +3796,12 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "def50a86306165861203e7f84ecffbbdfdea79f0e51039b33de1e952358c47ac" +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + [[package]] name = "radium" version = "0.7.0" @@ -3750,7 +3917,7 @@ dependencies = [ "bytes", "combine", "futures-util", - "itoa 1.0.5", + "itoa", "percent-encoding", "pin-project-lite", "ryu", @@ -3764,6 +3931,7 @@ name = "redis-rate-limiter" version = "0.2.0" dependencies = [ "anyhow", + "chrono", "deadpool-redis", "tokio", ] @@ -3825,9 +3993,9 @@ dependencies = [ [[package]] name = "rend" -version = "0.3.6" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79af64b4b6362ffba04eef3a4e10829718a4896dac19daa741851c86781edf95" +checksum = "581008d2099240d37fb08d77ad713bcaec2c4d89d50b5b21a8bb1996bbab68ab" dependencies = [ "bytecheck", ] @@ -3862,10 +4030,12 @@ dependencies = [ "serde_urlencoded", "tokio", "tokio-rustls", + "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots", "winreg", @@ -3914,9 +4084,9 @@ dependencies = [ [[package]] name = "rkyv" -version = "0.7.39" +version = "0.7.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cec2b3485b07d96ddfd3134767b8a447b45ea4eb91448d0a35180ec0ffd5ed15" +checksum = "c30f1d45d9aa61cbc8cd1eb87705470892289bb2d01943e7803b873a57404dc3" dependencies = [ "bytecheck", "hashbrown 0.12.3", @@ -3928,9 +4098,9 @@ dependencies = [ [[package]] name = "rkyv_derive" -version = "0.7.39" +version = "0.7.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eaedadc88b53e36dd32d940ed21ae4d850d5916f2581526921f553a72ac34c4" +checksum = "ff26ed6c7c4dfc2aa9480b86a60e3c7233543a270a680e10758a507c5a4ce476" dependencies = [ "proc-macro2", "quote", @@ -4002,11 +4172,11 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.28.0" +version = "1.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe32e8c89834541077a5c5bbe5691aa69324361e27e6aeb3552a737db4a70c8" +checksum = "e13cf35f7140155d02ba4ec3294373d513a3c7baa8364c162b030e33c61520a8" dependencies = [ - "arrayvec", + "arrayvec 0.7.2", "borsh", "bytecheck", "byteorder", @@ -4047,16 +4217,16 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.7" +version = "0.36.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fdebc4b395b7fbb9ab11e462e20ed9051e7b16e42d24042c776eca0ac81b03" +checksum = "f43abb88211988493c1abb44a70efa56ff0ce98f233b7b276146f1f3f7ba9644" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -4143,6 +4313,12 @@ dependencies = [ "parking_lot 0.12.1", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "scopeguard" version = "1.1.0" @@ -4217,7 +4393,7 @@ dependencies = [ "regex", "sea-schema", "tracing", - "tracing-subscriber", + "tracing-subscriber 0.3.16", "url", ] @@ -4248,7 +4424,7 @@ dependencies = [ "sea-orm-cli", "sea-schema", "tracing", - "tracing-subscriber", + "tracing-subscriber 0.3.16", ] [[package]] @@ -4288,7 +4464,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "63f62030c60f3a691f5fe251713b4e220b306e50a71e1d6f9cce1f24bb781978" dependencies = [ - "heck 0.4.0", + "heck 0.4.1", "proc-macro2", "quote", "syn", @@ -4371,9 +4547,9 @@ dependencies = [ [[package]] name = "send_wrapper" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "930c0acf610d3fdb5e2ab6213019aaa04e227ebe9547b0649ba599b16d788bd7" +checksum = "cd0b0ec5f1c1ca621c432a25813d8d60c88abe6d3e08a3eb9cf37d97a0fe3d73" [[package]] name = "sentry" @@ -4518,7 +4694,7 @@ version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cad406b69c91885b5107daf2c29572f6c8cdb3c66826821e286c533490c0bc76" dependencies = [ - "itoa 1.0.5", + "itoa", "ryu", "serde", ] @@ -4548,12 +4724,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c1a4ca38f4e746460d1dbd3711b8ca8ae314d1b21247edeff61dd20325b5a6f" dependencies = [ "heapless", - "nom", + "nom 7.1.3", "serde", "serde_plain", "thiserror", ] +[[package]] +name = "serde_qs" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cac3f1e2ca2fe333923a1ae72caca910b98ed0630bb35ef6f8c8517d6e81afa" +dependencies = [ + "percent-encoding", + "serde", + "thiserror", +] + [[package]] name = "serde_spanned" version = "0.6.1" @@ -4570,7 +4757,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa 1.0.5", + "itoa", "ryu", "serde", ] @@ -4723,6 +4910,27 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +[[package]] +name = "snafu" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eab12d3c261b2308b0d80c26fffb58d17eba81a4be97890101f416b478c79ca7" +dependencies = [ + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "socket2" version = "0.4.7" @@ -4754,9 +4962,9 @@ checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" [[package]] name = "spin" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f6002a767bff9e83f8eeecf883ecb8011875a21ae8da43bffb817a57e78cc09" +checksum = "7dccf47db1b41fa1573ed27ccf5e08e3ca771cb994f776668c5ebda893b248fc" dependencies = [ "lock_api", ] @@ -4788,7 +4996,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" dependencies = [ "itertools", - "nom", + "nom 7.1.3", "unicode_categories", ] @@ -4828,7 +5036,7 @@ dependencies = [ "hashlink", "hex", "indexmap", - "itoa 1.0.5", + "itoa", "libc", "log", "memchr", @@ -4865,7 +5073,7 @@ checksum = "b850fa514dc11f2ee85be9d055c512aa866746adfacd1cb42d867d68e6a5b0d9" dependencies = [ "dotenvy", "either", - "heck 0.4.0", + "heck 0.4.1", "once_cell", "proc-macro2", "quote", @@ -4943,7 +5151,7 @@ version = "0.24.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" dependencies = [ - "heck 0.4.0", + "heck 0.4.1", "proc-macro2", "quote", "rustversion", @@ -4958,9 +5166,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "svm-rs" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e18bbb2b229a2cc0d8ba58603adb0e460ad49a3451b1540fd6f7a5d37fd03b80" +checksum = "b1b8e811a6443e8d93665a5e532efa8429ea8e2052a234a82e2cd69478913310" dependencies = [ "anyhow", "cfg-if", @@ -5107,10 +5315,11 @@ dependencies = [ [[package]] name = "thread_local" -version = "1.1.4" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" dependencies = [ + "cfg-if", "once_cell", ] @@ -5131,7 +5340,7 @@ version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" dependencies = [ - "itoa 1.0.5", + "itoa", "serde", "time-core", "time-macros", @@ -5259,6 +5468,20 @@ dependencies = [ "tungstenite 0.18.0", ] +[[package]] +name = "tokio-uring" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d5e02bb137e030b3a547c65a3bd2f1836d66a97369fdcc69034002b10e155ef" +dependencies = [ + "io-uring", + "libc", + "scoped-tls", + "slab", + "socket2", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.7" @@ -5311,9 +5534,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.18.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "729bfd096e40da9c001f778f5cdecbd2957929a24e10e5883d9392220a751581" +checksum = "56c59d8dd7d0dcbc6428bf7aa2f0e823e26e43b3c9aca15bbc9475d23e5fa12b" dependencies = [ "indexmap", "nom8", @@ -5429,6 +5652,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" dependencies = [ "once_cell", + "valuable", ] [[package]] @@ -5441,13 +5665,45 @@ dependencies = [ "tracing", ] +[[package]] +name = "tracing-serde" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71" +dependencies = [ + "ansi_term", + "chrono", + "lazy_static", + "matchers 0.0.1", + "parking_lot 0.11.2", + "regex", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-serde", +] + [[package]] name = "tracing-subscriber" version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6176eae26dd70d0c919749377897b54a9276bd7061339665dd68777926b5a70" dependencies = [ - "matchers", + "matchers 0.1.0", "once_cell", "regex", "sharded-slab", @@ -5664,6 +5920,12 @@ dependencies = [ "serde", ] +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -5781,6 +6043,19 @@ version = "0.2.84" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +[[package]] +name = "wasm-streams" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bbae3363c08332cadccd13b67db371814cd214c2524020932f0804b8cf7c078" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wasm-timer" version = "0.2.5" @@ -5833,6 +6108,7 @@ dependencies = [ "hashbrown 0.13.2", "hdrhistogram", "http", + "influxdb2", "ipnet", "itertools", "log", @@ -5861,6 +6137,7 @@ dependencies = [ "time 0.3.20", "tokio", "tokio-stream", + "tokio-uring", "toml 0.7.2", "tower", "tower-http 0.4.0", @@ -6035,13 +6312,14 @@ dependencies = [ [[package]] name = "ws_stream_wasm" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47ca1ab42f5afed7fc332b22b6e932ca5414b209465412c8cdf0ad23bc0de645" +checksum = "7999f5f4217fe3818726b66257a4475f71e74ffd190776ad053fa159e50737f5" dependencies = [ "async_io_stream", "futures", "js-sys", + "log", "pharos", "rustc_version", "send_wrapper", @@ -6051,6 +6329,12 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" + [[package]] name = "wyz" version = "0.5.1" @@ -6074,9 +6358,9 @@ checksum = "c394b5bd0c6f669e7275d9c20aa90ae064cb22e75a1cad54e1b34088034b149f" [[package]] name = "zip" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537ce7411d25e54e8ae21a7ce0b15840e7bfcff15b51d697ec3266cc76bdf080" +checksum = "0445d0fbc924bb93539b4316c11afb121ea39296f99a3c4c9edad09e3658cdef" dependencies = [ "aes 0.7.5", "byteorder", @@ -6113,9 +6397,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.5+zstd.1.5.2" +version = "2.0.7+zstd.1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edc50ffce891ad571e9f9afe5039c4837bede781ac4bb13052ed7ae695518596" +checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" dependencies = [ "cc", "libc", diff --git a/Dockerfile b/Dockerfile index 9e1a5400..d2fb69c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -33,11 +33,12 @@ RUN --mount=type=cache,target=/usr/local/cargo/registry \ RUN --mount=type=cache,target=/usr/local/cargo/registry \ --mount=type=cache,target=/app/target \ cargo install \ + --features tokio-uring \ --locked \ --no-default-features \ + --path ./web3_proxy \ --profile faster_release \ - --root /opt/bin \ - --path ./web3_proxy + --root /opt/bin # # We do not need the Rust toolchain to run the binary! diff --git a/TODO.md b/TODO.md index 6b2ad470..759abb7b 100644 --- a/TODO.md +++ b/TODO.md @@ -369,6 +369,36 @@ These are not yet ordered. There might be duplicates. We might not actually need - have a blocking future watching the config file and calling app.apply_config() on first load and on change - work started on this in the "config_reloads" branch. because of how we pass channels around during spawn, this requires a larger refactor. - [-] if we subscribe to a server that is syncing, it gives us null block_data_limit. when it catches up, we don't ever send queries to it. we need to recheck block_data_limit +- [ ] don't use new_head_provider anywhere except new head subscription +- [x] remove the "metered" crate now that we save aggregate queries? +- [x] don't use systemtime. use chrono +- [x] graceful shutdown + - [x] frontend needs to shut down first. this will stop serving requests on /health and so new requests should quickly stop being routed to us + - [x] when frontend has finished, tell all the other tasks to stop + - [x] stats buffer needs to flush to both the database and influxdb +- [x] `rpc_accounting` script +- [x] period_datetime should always round to the start of the minute. this will ensure aggregations use as few rows as possible +- [x] weighted random choice should still prioritize non-archive servers + - maybe shuffle randomly and then sort by (block_limit, random_index)? + - maybe sum available_requests grouped by archive/non-archive. only limit to non-archive if they have enough? +- [x] if we subscribe to a server that is syncing, it gives us null block_data_limit. when it catches up, we don't ever send queries to it. we need to recheck block_data_limit +- [x] add a "backup" tier that is only used if balanced_rpcs has "no servers synced" + - use this tier to check timestamp on latest block. if we are behind that by more than a few seconds, something is wrong +- [x] `change_user_tier_by_address` script +- [x] emit stats for user's successes, retries, failures, with the types of requests, chain, rpc +- [x] add caching to speed up stat queries +- [x] config parsing is strict right now. this makes it hard to deploy on git push since configs need to change along with it + - changed to only emit a warning if there is an unknown configuration key +- [x] make the "not synced" error more verbose +- [x] short lived cache on /health +- [x] cache /status for longer +- [x] sort connections during eth_sendRawTransaction +- [x] block all admin_ rpc commands +- [x] remove the "metered" crate now that we save aggregate queries? +- [x] add archive depth to app config +- [x] improve "archive_needed" boolean. change to "block_depth" +- [x] keep score of new_head timings for all rpcs +- [x] having the whole block in /status is very verbose. trim it down - [-] proxy mode for benchmarking all backends - [-] proxy mode for sending to multiple backends - [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly @@ -391,7 +421,15 @@ These are not yet ordered. There might be duplicates. We might not actually need - [ ] maybe we shouldn't route eth_getLogs to syncing nodes. serving queries slows down sync significantly - change the send_best function to only include servers that are at least close to fully synced - [ ] have private transactions be enabled by a url setting rather than a setting on the key +- [ ] enable mev protected transactions with either a /protect/ url (instead of /private/) or the database (when on /rpc/) - [ ] cli for adding rpc keys to an existing user +- [ ] rename "private" to "mev protected" to avoid confusion about private transactions being public once they are mined +- [ ] allow restricting an rpc key to specific chains +- [ ] writes to request_latency should be handled by a background task so they don't slow down the request + - maybe we can use https://docs.rs/hdrhistogram/latest/hdrhistogram/sync/struct.SyncHistogram.html +- [ ] keep re-broadcasting transactions until they are confirmed +- [ ] if mev protection is disabled, we should send to *both* balanced_rpcs *and* private_rps +- [ ] if mev protection is enabled, we should sent to *only* private_rpcs - [ ] rate limiting/throttling on query_user_stats - [ ] web3rpc configs should have a max_concurrent_requests - will probably want a tool for calculating a safe value for this. too low and we could kill our performance @@ -400,44 +438,45 @@ These are not yet ordered. There might be duplicates. We might not actually need - [ ] setting request limits to None is broken. it does maxu64 and then internal deferred rate limiter counts try to *99/100 - [ ] if kafka fails to connect at the start, automatically reconnect - [ ] during shutdown, mark the proxy unhealthy and send unsubscribe responses for any open websocket subscriptions +- [ ] setting request limits to None is broken. it does maxu64 and then internal deferred rate limiter counts overflows when it does to `x*99/100` +- [ ] during shutdown, send unsubscribe responses for any open websocket subscriptions - [ ] some chains still use total_difficulty. have total_difficulty be used only if the chain needs it - if total difficulty is not on the block and we aren't on ETH, fetch the full block instead of just the header - if total difficulty is set and non-zero, use it for consensus instead of just the number - [ ] query_user_stats cache hit rate - [ ] need debounce on reconnect. websockets are closing on us and then we reconnect twice. locks on ProviderState need more thought -- [ ] having the whole block in status is very verbose. trim it down -- [ ] `cost estimate` script - - sum bytes and number of requests. prompt hosting costs. divide -- [ ] `stat delay` script - - query database for newest stat -- [ ] period_datetime should always be :00. right now it depends on start time +- [ ] having the whole block in /status is very verbose. trim it down - [ ] we have our hard rate limiter set up with a period of 60. but most providers have period of 1- [ ] two servers running will confuse rpc_accounting! - it won't happen with users often because they should be sticky to one proxy, but unauthenticated users will definitely hit this - one option: we need the insert to be an upsert, but how do we merge historgrams? - [ ] don't use systemtime. use chrono +- [ ] soft limit needs more thought + - it should be the min of total_sum_soft_limit (from only non-lagged servers) and min_sum_soft_limit + - otherwise it won't track anything and will just give errors. + - but if web3 proxy has just started, we should give some time otherwise we will thundering herd the first server that responds - [ ] connection pool for websockets. use tokio-tungstenite directly. no need for ethers providers since serde_json is enough for us - this should also get us closer to being able to do our own streaming json parser where we can -- [ ] get `oldest_allowed` out of config. or calculate automatically based on block time. -- [ ] `change_user_tier_by_address` script - [ ] figure out if "could not get block from params" is a problem worth logging - maybe it was an ots request? -- [ ] eth_subscribe rpc_accounting has everything as cache_hits. should we instead count it as one background request? +- [ ] change redirect_rpc_key_url to match the newest url scheme - [ ] implement filters - [ ] implement remaining subscriptions - would be nice if our subscriptions had better gaurentees than geth/erigon do, but maybe simpler to just setup a broadcast channel and proxy all the respones to a backend instead - [ ] tests should use `test-env-log = "0.2.8"` -- [ ] weighted random choice should still prioritize non-archive servers - - maybe shuffle randomly and then sort by (block_limit, random_index)? - - maybe sum available_requests grouped by archive/non-archive. only limit to non-archive if they have enough? - [ ] some places we call it "accounting" others a "stat". be consistent - [ ] cli commands to search users by key - [ ] flamegraphs show 25% of the time to be in moka-housekeeper. tune that - [ ] config parsing is strict right now. this makes it hard to deploy on git push since configs need to change along with it - [ ] when displaying the user's data, they just see an opaque id for their tier. We should join that data +- [ ] refactor so configs can change while running + - this will probably be a rather large change, but is necessary when we have autoscaling + - create the app without applying any config to it + - have a blocking future watching the config file and calling app.apply_config() on first load and on change + - work started on this in the "config_reloads" branch. because of how we pass channels around during spawn, this requires a larger refactor. +- [ ] when displaying the user's data, they just see an opaque id for their tier. We should join that data so they see the tier name and limits - [ ] add indexes to speed up stat queries - [ ] the public rpc is rate limited by ip and the authenticated rpc is rate limit by key - this means if a dapp uses the authenticated RPC on their website, they could get rate limited more easily -- [ ] add cacheing to speed up stat queries - [ ] take an option to set a non-default role when creating a user - [ ] different prune levels for free tiers - [ ] have a test that runs ethspam and versus @@ -451,14 +490,10 @@ These are not yet ordered. There might be duplicates. We might not actually need - [ ] after running for a while, https://eth-ski.llamanodes.com/status is only at 157 blocks and hashes. i thought they would be near 10k after running for a while - adding uptime to the status should help - i think this is already in our todo list -- [ ] improve private transactions. keep re-broadcasting until they are confirmed - [ ] write a test that uses the cli to create a user and modifies their key - [ ] Uuid/Ulid instead of big_unsigned for database ids - might have to use Uuid in sea-orm and then convert to Ulid on display - https://www.kostolansky.sk/posts/how-to-migrate-to-uuid/ -- [ ] make the "not synced" error more verbose - - I think there is a bug in our synced_rpcs filtering. likely in has_block_data - - seeing "not synced" when I load https://vfat.tools/esd/ - [ ] emit stdandard deviation? - [ ] emit global stat on retry - [ ] emit global stat on no servers synced @@ -510,12 +545,11 @@ These are not yet ordered. There might be duplicates. We might not actually need - [ ] nice output when cargo doc is run - [ ] cache more things locally or in redis - [ ] stats when forks are resolved (and what chain they were on?) -- [ ] emit stats for user's successes, retries, failures, with the types of requests, chain, rpc - [ ] Only subscribe to transactions when someone is listening and if the server has opted in to it - [ ] When sending eth_sendRawTransaction, retry errors - [ ] If we need an archive server and no servers in sync, exit immediately with an error instead of waiting 60 seconds - [ ] 120 second timeout is too short. Maybe do that for free tier and larger timeout for paid. Problem is that some queries can take over 1000 seconds -- [ ] when handling errors from axum parsing the Json...Enum, the errors don't get wrapped in json. i think we need a axum::Layer +- [ ] when handling errors from axum parsing the Json...Enum in the function signature, the errors don't get wrapped in json. i think we need a axum::Layer - [ ] don't "unwrap" anywhere. give proper errors - [ ] handle log subscriptions - probably as a paid feature @@ -546,6 +580,11 @@ These are not yet ordered. There might be duplicates. We might not actually need The above methods return Entry type, which provides is_fresh method to check if the value was freshly computed or already existed in the cache. - [ ] lag message always shows on first response - http interval on blastapi lagging by 1! +- [ ] change scoring for rpcs again. "p2c ewma" + - [ ] weighted random sort: (soft_limit - ewma active requests * num web3_proxy servers) + - 2. soft_limit + - [ ] pick 2 servers from the random sort. + - [ ] exponential weighted moving average for block subscriptions of time behind the first server (works well for ws but not http) ## V2 @@ -690,9 +729,13 @@ in another repo: event subscriber - [ ] have an upgrade tier that queries multiple backends at once. returns on first Ok result, collects errors. if no Ok, find the most common error and then respond with that - [ ] give public_recent_ips_salt a better, more general, name - [ ] include tier in the head block logs? +<<<<<<< HEAD - [ ] i think i use FuturesUnordered when a try_join_all might be better - [ ] since we are read-heavy on our configs, maybe we should use a cache - "using a thread local storage and explicit types" https://docs.rs/arc-swap/latest/arc_swap/cache/struct.Cache.html - [ ] tests for config reloading - [ ] use pin instead of arc for a bunch of things? - https://fasterthanli.me/articles/pin-and-suffering +======= +- [ ] calculate archive depth automatically based on block_data_limits +>>>>>>> 77df3fa (stats v2) diff --git a/config/example.toml b/config/example.toml index d72147ba..7c61b8f5 100644 --- a/config/example.toml +++ b/config/example.toml @@ -13,6 +13,11 @@ db_replica_url = "mysql://root:dev_web3_proxy@127.0.0.1:13306/dev_web3_proxy" kafka_urls = "127.0.0.1:19092" +# a timeseries database is optional. it is used for making pretty graphs +influxdb_host = "http://127.0.0.1:18086" +influxdb_org = "dev_org" +influxdb_token = "dev_web3_proxy_auth_token" + # thundering herd protection # only mark a block as the head block if the sum of their soft limits is greater than or equal to min_sum_soft_limit min_sum_soft_limit = 2_000 diff --git a/entities/src/login.rs b/entities/src/login.rs index f4af45a3..92c2df5e 100644 --- a/entities/src/login.rs +++ b/entities/src/login.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 use crate::serialization; use sea_orm::entity::prelude::*; diff --git a/entities/src/mod.rs b/entities/src/mod.rs index 2121477c..fccd7e86 100644 --- a/entities/src/mod.rs +++ b/entities/src/mod.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.6 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 pub mod prelude; @@ -8,6 +8,7 @@ pub mod login; pub mod pending_login; pub mod revert_log; pub mod rpc_accounting; +pub mod rpc_accounting_v2; pub mod rpc_key; pub mod sea_orm_active_enums; pub mod secondary_user; diff --git a/entities/src/pending_login.rs b/entities/src/pending_login.rs index 196b851c..c162aaa9 100644 --- a/entities/src/pending_login.rs +++ b/entities/src/pending_login.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 use crate::serialization; use sea_orm::entity::prelude::*; diff --git a/entities/src/prelude.rs b/entities/src/prelude.rs index bb19388d..a4dda8b1 100644 --- a/entities/src/prelude.rs +++ b/entities/src/prelude.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 pub use super::admin::Entity as Admin; pub use super::admin_trail::Entity as AdminTrail; @@ -6,6 +6,7 @@ pub use super::login::Entity as Login; pub use super::pending_login::Entity as PendingLogin; pub use super::revert_log::Entity as RevertLog; pub use super::rpc_accounting::Entity as RpcAccounting; +pub use super::rpc_accounting_v2::Entity as RpcAccountingV2; pub use super::rpc_key::Entity as RpcKey; pub use super::secondary_user::Entity as SecondaryUser; pub use super::user::Entity as User; diff --git a/entities/src/revert_log.rs b/entities/src/revert_log.rs index 9835ef8e..20cfc4e9 100644 --- a/entities/src/revert_log.rs +++ b/entities/src/revert_log.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 use super::sea_orm_active_enums::Method; use crate::serialization; diff --git a/entities/src/rpc_accounting.rs b/entities/src/rpc_accounting.rs index e96ba0d8..a615e6ff 100644 --- a/entities/src/rpc_accounting.rs +++ b/entities/src/rpc_accounting.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 use sea_orm::entity::prelude::*; use serde::{Deserialize, Serialize}; diff --git a/entities/src/rpc_accounting_v2.rs b/entities/src/rpc_accounting_v2.rs new file mode 100644 index 00000000..8fef415d --- /dev/null +++ b/entities/src/rpc_accounting_v2.rs @@ -0,0 +1,47 @@ +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 + +use sea_orm::entity::prelude::*; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq, Serialize, Deserialize)] +#[sea_orm(table_name = "rpc_accounting_v2")] +pub struct Model { + #[sea_orm(primary_key)] + pub id: u64, + pub rpc_key_id: Option, + pub chain_id: u64, + pub period_datetime: DateTimeUtc, + pub method: Option, + pub origin: Option, + pub archive_needed: bool, + pub error_response: bool, + pub frontend_requests: u64, + pub backend_requests: u64, + pub backend_retries: u64, + pub no_servers: u64, + pub cache_misses: u64, + pub cache_hits: u64, + pub sum_request_bytes: u64, + pub sum_response_millis: u64, + pub sum_response_bytes: u64, +} + +#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] +pub enum Relation { + #[sea_orm( + belongs_to = "super::rpc_key::Entity", + from = "Column::RpcKeyId", + to = "super::rpc_key::Column::Id", + on_update = "NoAction", + on_delete = "NoAction" + )] + RpcKey, +} + +impl Related for Entity { + fn to() -> RelationDef { + Relation::RpcKey.def() + } +} + +impl ActiveModelBehavior for ActiveModel {} diff --git a/entities/src/rpc_key.rs b/entities/src/rpc_key.rs index 79a9f4bd..54102209 100644 --- a/entities/src/rpc_key.rs +++ b/entities/src/rpc_key.rs @@ -1,6 +1,6 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 -use super::sea_orm_active_enums::LogLevel; +use super::sea_orm_active_enums::TrackingLevel; use crate::serialization; use sea_orm::entity::prelude::*; use serde::{Deserialize, Serialize}; @@ -26,7 +26,8 @@ pub struct Model { #[sea_orm(column_type = "Text", nullable)] pub allowed_user_agents: Option, pub log_revert_chance: f64, - pub log_level: LogLevel, + // TODO: rename this with a migration + pub log_level: TrackingLevel, } #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] @@ -35,6 +36,8 @@ pub enum Relation { RevertLog, #[sea_orm(has_many = "super::rpc_accounting::Entity")] RpcAccounting, + #[sea_orm(has_many = "super::rpc_accounting_v2::Entity")] + RpcAccountingV2, #[sea_orm( belongs_to = "super::user::Entity", from = "Column::UserId", @@ -57,6 +60,12 @@ impl Related for Entity { } } +impl Related for Entity { + fn to() -> RelationDef { + Relation::RpcAccountingV2.def() + } +} + impl Related for Entity { fn to() -> RelationDef { Relation::User.def() diff --git a/entities/src/sea_orm_active_enums.rs b/entities/src/sea_orm_active_enums.rs index 593e2c17..7882cf35 100644 --- a/entities/src/sea_orm_active_enums.rs +++ b/entities/src/sea_orm_active_enums.rs @@ -1,11 +1,12 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 use sea_orm::entity::prelude::*; use serde::{Deserialize, Serialize}; +// TODO: rename to StatLevel? AccountingLevel? What? #[derive(Debug, Clone, PartialEq, Eq, EnumIter, DeriveActiveEnum, Serialize, Deserialize)] #[sea_orm(rs_type = "String", db_type = "Enum", enum_name = "log_level")] -pub enum LogLevel { +pub enum TrackingLevel { #[sea_orm(string_value = "none")] None, #[sea_orm(string_value = "aggregated")] @@ -14,7 +15,7 @@ pub enum LogLevel { Detailed, } -impl Default for LogLevel { +impl Default for TrackingLevel { fn default() -> Self { Self::None } diff --git a/entities/src/secondary_user.rs b/entities/src/secondary_user.rs index 86c8c0e7..69b62220 100644 --- a/entities/src/secondary_user.rs +++ b/entities/src/secondary_user.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 use super::sea_orm_active_enums::Role; use sea_orm::entity::prelude::*; diff --git a/entities/src/user.rs b/entities/src/user.rs index 48c89c08..289e1fd1 100644 --- a/entities/src/user.rs +++ b/entities/src/user.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 use crate::serialization; use sea_orm::entity::prelude::*; diff --git a/entities/src/user_tier.rs b/entities/src/user_tier.rs index 5d069e74..a025bc96 100644 --- a/entities/src/user_tier.rs +++ b/entities/src/user_tier.rs @@ -1,4 +1,4 @@ -//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.5 +//! `SeaORM` Entity. Generated by sea-orm-codegen 0.10.7 use sea_orm::entity::prelude::*; use serde::{Deserialize, Serialize}; diff --git a/migration/src/lib.rs b/migration/src/lib.rs index fe7e3ec6..cd4cbff6 100644 --- a/migration/src/lib.rs +++ b/migration/src/lib.rs @@ -17,6 +17,7 @@ mod m20230119_204135_better_free_tier; mod m20230130_124740_read_only_login_logic; mod m20230130_165144_prepare_admin_imitation_pre_login; mod m20230215_152254_admin_trail; +mod m20230125_204810_stats_v2; pub struct Migrator; @@ -41,6 +42,7 @@ impl MigratorTrait for Migrator { Box::new(m20230130_124740_read_only_login_logic::Migration), Box::new(m20230130_165144_prepare_admin_imitation_pre_login::Migration), Box::new(m20230215_152254_admin_trail::Migration), + Box::new(m20230125_204810_stats_v2::Migration), ] } } diff --git a/migration/src/m20230125_204810_stats_v2.rs b/migration/src/m20230125_204810_stats_v2.rs new file mode 100644 index 00000000..ef9250e8 --- /dev/null +++ b/migration/src/m20230125_204810_stats_v2.rs @@ -0,0 +1,157 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .create_table( + Table::create() + .table(RpcAccountingV2::Table) + .col( + ColumnDef::new(RpcAccountingV2::Id) + .big_unsigned() + .not_null() + .auto_increment() + .primary_key(), + ) + .col( + ColumnDef::new(RpcAccountingV2::RpcKeyId) + .big_unsigned() + .null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::ChainId) + .big_unsigned() + .not_null(), + ) + .col(ColumnDef::new(RpcAccountingV2::Origin).string().null()) + .col( + ColumnDef::new(RpcAccountingV2::PeriodDatetime) + .timestamp() + .not_null(), + ) + .col(ColumnDef::new(RpcAccountingV2::Method).string().null()) + .col( + ColumnDef::new(RpcAccountingV2::ArchiveNeeded) + .boolean() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::ErrorResponse) + .boolean() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::FrontendRequests) + .big_unsigned() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::BackendRequests) + .big_unsigned() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::BackendRetries) + .big_unsigned() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::NoServers) + .big_unsigned() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::CacheMisses) + .big_unsigned() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::CacheHits) + .big_unsigned() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::SumRequestBytes) + .big_unsigned() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::SumResponseMillis) + .big_unsigned() + .not_null(), + ) + .col( + ColumnDef::new(RpcAccountingV2::SumResponseBytes) + .big_unsigned() + .not_null(), + ) + .foreign_key( + sea_query::ForeignKey::create() + .from(RpcAccountingV2::Table, RpcAccountingV2::RpcKeyId) + .to(RpcKey::Table, RpcKey::Id), + ) + .index(sea_query::Index::create().col(RpcAccountingV2::ChainId)) + .index(sea_query::Index::create().col(RpcAccountingV2::Origin)) + .index(sea_query::Index::create().col(RpcAccountingV2::PeriodDatetime)) + .index(sea_query::Index::create().col(RpcAccountingV2::Method)) + .index(sea_query::Index::create().col(RpcAccountingV2::ArchiveNeeded)) + .index(sea_query::Index::create().col(RpcAccountingV2::ErrorResponse)) + .index( + sea_query::Index::create() + .col(RpcAccountingV2::RpcKeyId) + .col(RpcAccountingV2::ChainId) + .col(RpcAccountingV2::Origin) + .col(RpcAccountingV2::PeriodDatetime) + .col(RpcAccountingV2::Method) + .col(RpcAccountingV2::ArchiveNeeded) + .col(RpcAccountingV2::ErrorResponse) + .unique(), + ) + .to_owned(), + ) + .await?; + + Ok(()) + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .drop_table(Table::drop().table(RpcAccountingV2::Table).to_owned()) + .await?; + + Ok(()) + } +} + +/// Partial table definition +#[derive(Iden)] +pub enum RpcKey { + Table, + Id, +} + +#[derive(Iden)] +enum RpcAccountingV2 { + Table, + Id, + RpcKeyId, + ChainId, + Origin, + PeriodDatetime, + Method, + ArchiveNeeded, + ErrorResponse, + FrontendRequests, + BackendRequests, + BackendRetries, + NoServers, + CacheMisses, + CacheHits, + SumRequestBytes, + SumResponseMillis, + SumResponseBytes, +} diff --git a/redis-rate-limiter/Cargo.toml b/redis-rate-limiter/Cargo.toml index 9ba37ad3..959a7d48 100644 --- a/redis-rate-limiter/Cargo.toml +++ b/redis-rate-limiter/Cargo.toml @@ -6,5 +6,6 @@ edition = "2021" [dependencies] anyhow = "1.0.69" +chrono = "0.4.23" deadpool-redis = { version = "0.11.1", features = ["rt_tokio_1", "serde"] } tokio = "1.25.0" diff --git a/redis-rate-limiter/src/lib.rs b/redis-rate-limiter/src/lib.rs index 7c9ab5b3..551584ad 100644 --- a/redis-rate-limiter/src/lib.rs +++ b/redis-rate-limiter/src/lib.rs @@ -1,7 +1,6 @@ //#![warn(missing_docs)] use anyhow::Context; use std::ops::Add; -use std::time::{SystemTime, UNIX_EPOCH}; use tokio::time::{Duration, Instant}; pub use deadpool_redis::redis; @@ -48,10 +47,7 @@ impl RedisRateLimiter { pub fn now_as_secs(&self) -> f32 { // TODO: if system time doesn't match redis, this won't work great - SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("cannot tell the time") - .as_secs_f32() + (chrono::Utc::now().timestamp_millis() as f32) / 1_000.0 } pub fn period_id(&self, now_as_secs: f32) -> f32 { diff --git a/web3_proxy/Cargo.toml b/web3_proxy/Cargo.toml index 03f15b35..6446ec7b 100644 --- a/web3_proxy/Cargo.toml +++ b/web3_proxy/Cargo.toml @@ -36,6 +36,7 @@ derive_more = "0.99.17" dotenv = "0.15.0" env_logger = "0.10.0" ethers = { version = "1.0.2", default-features = false, features = ["rustls", "ws"] } +ewma = "0.1.1" fdlimit = "0.2.1" flume = "0.10.14" futures = { version = "0.3.26", features = ["thread-pool"] } @@ -45,6 +46,7 @@ handlebars = "4.3.6" hashbrown = { version = "0.13.2", features = ["serde"] } hdrhistogram = "7.5.2" http = "0.2.9" +influxdb2 = { version = "0.3", features = ["rustls"], default-features = false } ipnet = "2.7.1" itertools = "0.10.5" log = "0.4.17" @@ -52,6 +54,7 @@ moka = { version = "0.10.0", default-features = false, features = ["future"] } num = "0.4.0" num-traits = "0.2.15" once_cell = { version = "1.17.1" } +ordered-float = "3.4.0" pagerduty-rs = { version = "0.1.6", default-features = false, features = ["async", "rustls", "sync"] } parking_lot = { version = "0.12.1", features = ["arc_lock"] } prettytable = "*" @@ -69,11 +72,10 @@ siwe = "0.5.0" time = "0.3.20" tokio = { version = "1.25.0", features = ["full"] } tokio-stream = { version = "0.1.12", features = ["sync"] } +tokio-uring = { version = "0.4.0", optional = true } toml = "0.7.2" tower = "0.4.13" tower-http = { version = "0.4.0", features = ["cors", "sensitive-headers"] } ulid = { version = "1.0.0", features = ["serde"] } url = "2.3.1" uuid = "1.3.0" -ewma = "0.1.1" -ordered-float = "3.4.0" diff --git a/web3_proxy/src/admin_queries.rs b/web3_proxy/src/admin_queries.rs index 85ec37b7..8538a691 100644 --- a/web3_proxy/src/admin_queries.rs +++ b/web3_proxy/src/admin_queries.rs @@ -1,6 +1,6 @@ use crate::app::Web3ProxyApp; use crate::frontend::errors::FrontendErrorResponse; -use crate::user_queries::get_user_id_from_params; +use crate::http_params::get_user_id_from_params; use anyhow::Context; use axum::response::{IntoResponse, Response}; use axum::{ diff --git a/web3_proxy/src/app/mod.rs b/web3_proxy/src/app/mod.rs index 6278b836..ae8445b5 100644 --- a/web3_proxy/src/app/mod.rs +++ b/web3_proxy/src/app/mod.rs @@ -1,7 +1,6 @@ // TODO: this file is way too big now. move things into other modules mod ws; -use crate::app_stats::{ProxyResponseStat, StatEmitter, Web3ProxyStat}; use crate::block_number::{block_needed, BlockNeeded}; use crate::config::{AppConfig, TopConfig}; use crate::frontend::authorization::{Authorization, RequestMetadata, RpcSecretKey}; @@ -10,17 +9,19 @@ use crate::frontend::rpc_proxy_ws::ProxyMode; use crate::jsonrpc::{ JsonRpcForwardedResponse, JsonRpcForwardedResponseEnum, JsonRpcRequest, JsonRpcRequestEnum, }; -use crate::rpcs::blockchain::Web3ProxyBlock; +use crate::rpcs::blockchain::{BlocksByHashCache, Web3ProxyBlock}; +use crate::rpcs::consensus::ConsensusWeb3Rpcs; use crate::rpcs::many::Web3Rpcs; use crate::rpcs::one::Web3Rpc; use crate::rpcs::transactions::TxStatus; +use crate::stats::{AppStat, RpcQueryStats, StatBuffer}; use crate::user_token::UserBearerToken; use anyhow::Context; use axum::headers::{Origin, Referer, UserAgent}; use chrono::Utc; use deferred_rate_limiter::DeferredRateLimiter; use derive_more::From; -use entities::sea_orm_active_enums::LogLevel; +use entities::sea_orm_active_enums::TrackingLevel; use entities::user; use ethers::core::utils::keccak256; use ethers::prelude::{Address, Bytes, Transaction, TxHash, H256, U64}; @@ -65,8 +66,8 @@ pub static APP_USER_AGENT: &str = concat!( env!("CARGO_PKG_VERSION") ); -/// TODO: allow customizing the request period? -pub static REQUEST_PERIOD: u64 = 60; +// aggregate across 1 week +const BILLING_PERIOD_SECONDS: i64 = 60 * 60 * 24 * 7; #[derive(Debug, From)] struct ResponseCacheKey { @@ -153,10 +154,12 @@ type ResponseCache = pub type AnyhowJoinHandle = JoinHandle>; +/// TODO: move this #[derive(Clone, Debug, Default, From)] pub struct AuthorizationChecks { /// database id of the primary user. 0 if anon /// TODO: do we need this? its on the authorization so probably not + /// TODO: Option? pub user_id: u64, /// the key used (if any) pub rpc_secret_key: Option, @@ -175,17 +178,21 @@ pub struct AuthorizationChecks { pub allowed_user_agents: Option>, /// if None, allow any IP Address pub allowed_ips: Option>, - pub log_level: LogLevel, + /// how detailed any rpc account entries should be + pub tracking_level: TrackingLevel, /// Chance to save reverting eth_call, eth_estimateGas, and eth_sendRawTransaction to the database. + /// depending on the caller, errors might be expected. this keeps us from bloating our database /// TODO: f32 would be fine pub log_revert_chance: f64, - /// if true, transactions are broadcast to private mempools. They will still be public on the blockchain! + /// if true, transactions are broadcast only to private mempools. + /// IMPORTANT! Once confirmed by a miner, they will be public on the blockchain! pub private_txs: bool, pub proxy_mode: ProxyMode, } /// Simple wrapper so that we can keep track of read only connections. /// This does no blocking of writing in the compiler! +/// TODO: move this #[derive(Clone)] pub struct DatabaseReplica(pub DatabaseConnection); @@ -197,38 +204,60 @@ impl DatabaseReplica { } /// The application -// TODO: this debug impl is way too verbose. make something smaller // TODO: i'm sure this is more arcs than necessary, but spawning futures makes references hard pub struct Web3ProxyApp { /// Send requests to the best server available pub balanced_rpcs: Arc, pub http_client: Option, - /// Send private requests (like eth_sendRawTransaction) to all these servers - pub private_rpcs: Option>, - response_cache: ResponseCache, - // don't drop this or the sender will stop working - // TODO: broadcast channel instead? - watch_consensus_head_receiver: watch::Receiver>, - pending_tx_sender: broadcast::Sender, + /// application config + /// TODO: this will need a large refactor to handle reloads while running. maybe use a watch::Receiver? pub config: AppConfig, + /// Send private requests (like eth_sendRawTransaction) to all these servers + /// TODO: include another type so that we can use private miner relays that do not use JSONRPC requests + pub private_rpcs: Option>, + /// track JSONRPC responses + response_cache: ResponseCache, + /// rpc clients that subscribe to newHeads use this channel + /// don't drop this or the sender will stop working + /// TODO: broadcast channel instead? + pub watch_consensus_head_receiver: watch::Receiver>, + /// rpc clients that subscribe to pendingTransactions use this channel + /// This is the Sender so that new channels can subscribe to it + pending_tx_sender: broadcast::Sender, + /// Optional database for users and accounting pub db_conn: Option, + /// Optional read-only database for users and accounting pub db_replica: Option, /// store pending transactions that we've seen so that we don't send duplicates to subscribers + /// TODO: think about this more. might be worth storing if we sent the transaction or not and using this for automatic retries pub pending_transactions: Cache, + /// rate limit anonymous users pub frontend_ip_rate_limiter: Option>, + /// rate limit authenticated users pub frontend_registered_user_rate_limiter: Option>, + /// Optional time series database for making pretty graphs that load quickly + pub influxdb_client: Option, + /// rate limit the login endpoint + /// we do this because each pending login is a row in the database pub login_rate_limiter: Option, + /// volatile cache used for rate limits + /// TODO: i think i might just delete this entirely. instead use local-only concurrency limits. pub vredis_pool: Option, - // TODO: this key should be our RpcSecretKey class, not Ulid + /// cache authenticated users so that we don't have to query the database on the hot path + // TODO: should the key be our RpcSecretKey class instead of Ulid? pub rpc_secret_key_cache: Cache, + /// concurrent/parallel RPC request limits for authenticated users pub registered_user_semaphores: Cache, hashbrown::hash_map::DefaultHashBuilder>, + /// concurrent/parallel request limits for anonymous users pub ip_semaphores: Cache, hashbrown::hash_map::DefaultHashBuilder>, + /// concurrent/parallel application request limits for authenticated users pub bearer_token_semaphores: Cache, hashbrown::hash_map::DefaultHashBuilder>, - pub stat_sender: Option>, pub kafka_producer: Option, + /// channel for sending stats in a background task + pub stat_sender: Option>, } /// flatten a JoinError into an anyhow error @@ -355,6 +384,7 @@ pub async fn get_migrated_db( Ok(db_conn) } +/// starting an app creates many tasks #[derive(From)] pub struct Web3ProxyAppSpawn { /// the app. probably clone this to use in other groups of handles @@ -365,6 +395,8 @@ pub struct Web3ProxyAppSpawn { pub background_handles: FuturesUnordered>, /// config changes are sent here pub new_top_config_sender: watch::Sender, + /// watch this to know when to start the app + pub consensus_connections_watcher: watch::Receiver>>, } impl Web3ProxyApp { @@ -372,8 +404,11 @@ impl Web3ProxyApp { pub async fn spawn( top_config: TopConfig, num_workers: usize, - shutdown_receiver: broadcast::Receiver<()>, + shutdown_sender: broadcast::Sender<()>, ) -> anyhow::Result { + let rpc_account_shutdown_recevier = shutdown_sender.subscribe(); + let mut background_shutdown_receiver = shutdown_sender.subscribe(); + // safety checks on the config // while i would prefer this to be in a "apply_top_config" function, that is a larger refactor // TODO: maybe don't spawn with a config at all. have all config updates come through an apply_top_config call @@ -512,20 +547,46 @@ impl Web3ProxyApp { } }; - // setup a channel for receiving stats (generally with a high cardinality, such as per-user) - // we do this in a channel so we don't slow down our response to the users - let stat_sender = if let Some(db_conn) = db_conn.clone() { - let emitter_spawn = - StatEmitter::spawn(top_config.app.chain_id, db_conn, 60, shutdown_receiver)?; + let influxdb_client = match top_config.app.influxdb_host.as_ref() { + Some(influxdb_host) => { + let influxdb_org = top_config + .app + .influxdb_org + .clone() + .expect("influxdb_org needed when influxdb_host is set"); + let influxdb_token = top_config + .app + .influxdb_token + .clone() + .expect("influxdb_token needed when influxdb_host is set"); + let influxdb_client = + influxdb2::Client::new(influxdb_host, influxdb_org, influxdb_token); + + // TODO: test the client now. having a stat for "started" can be useful on graphs to mark deploys + + Some(influxdb_client) + } + None => None, + }; + + // create a channel for receiving stats + // we do this in a channel so we don't slow down our response to the users + // stats can be saved in mysql, influxdb, both, or none + let stat_sender = if let Some(emitter_spawn) = StatBuffer::try_spawn( + top_config.app.chain_id, + db_conn.clone(), + influxdb_client.clone(), + 60, + 1, + BILLING_PERIOD_SECONDS, + rpc_account_shutdown_recevier, + )? { + // since the database entries are used for accounting, we want to be sure everything is saved before exiting important_background_handles.push(emitter_spawn.background_handle); Some(emitter_spawn.stat_sender) } else { - warn!("cannot store stats without a database connection"); - - // TODO: subscribe to the shutdown_receiver here since the stat emitter isn't running? - None }; @@ -644,7 +705,9 @@ impl Web3ProxyApp { .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); // prepare a Web3Rpcs to hold all our balanced connections - let (balanced_rpcs, balanced_rpcs_handle) = Web3Rpcs::spawn( + // let (balanced_rpcs, balanced_rpcs_handle) = Web3Rpcs::spawn( + // connect to the load balanced rpcs + let (balanced_rpcs, balanced_handle, consensus_connections_watcher) = Web3Rpcs::spawn( top_config.app.chain_id, db_conn.clone(), http_client.clone(), @@ -659,7 +722,7 @@ impl Web3ProxyApp { .await .context("spawning balanced rpcs")?; - app_handles.push(balanced_rpcs_handle); + app_handles.push(balanced_handle); // prepare a Web3Rpcs to hold all our private connections // only some chains have this, so this is optional @@ -668,7 +731,9 @@ impl Web3ProxyApp { None } else { // TODO: do something with the spawn handle - let (private_rpcs, private_rpcs_handle) = Web3Rpcs::spawn( + // TODO: Merge + // let (private_rpcs, private_rpcs_handle) = Web3Rpcs::spawn( + let (private_rpcs, private_handle, _) = Web3Rpcs::spawn( top_config.app.chain_id, db_conn.clone(), http_client.clone(), @@ -689,7 +754,7 @@ impl Web3ProxyApp { .await .context("spawning private_rpcs")?; - app_handles.push(private_rpcs_handle); + app_handles.push(private_handle); Some(private_rpcs) }; @@ -709,6 +774,7 @@ impl Web3ProxyApp { login_rate_limiter, db_conn, db_replica, + influxdb_client, vredis_pool, rpc_secret_key_cache, bearer_token_semaphores, @@ -745,14 +811,26 @@ impl Web3ProxyApp { app_handles.push(config_handle); } +// ======= +// if important_background_handles.is_empty() { +// info!("no important background handles"); +// +// let f = tokio::spawn(async move { +// let _ = background_shutdown_receiver.recv().await; +// +// Ok(()) +// }); +// +// important_background_handles.push(f); +// >>>>>>> 77df3fa (stats v2) Ok(( app, app_handles, important_background_handles, new_top_config_sender, - ) - .into()) + consensus_connections_watcher + ).into()) } pub async fn apply_top_config(&self, new_top_config: TopConfig) -> anyhow::Result<()> { @@ -786,6 +864,7 @@ impl Web3ProxyApp { // TODO: what globals? should this be the hostname or what? // globals.insert("service", "web3_proxy"); + // TODO: this needs a refactor to get HELP and TYPE into the serialized text #[derive(Default, Serialize)] struct UserCount(i64); @@ -1069,7 +1148,6 @@ impl Web3ProxyApp { } } - // #[measure([ErrorCount, HitCount, ResponseTime, Throughput])] async fn proxy_cached_request( self: &Arc, authorization: &Arc, @@ -1078,7 +1156,7 @@ impl Web3ProxyApp { ) -> Result<(JsonRpcForwardedResponse, Vec>), FrontendErrorResponse> { // trace!("Received request: {:?}", request); - let request_metadata = Arc::new(RequestMetadata::new(REQUEST_PERIOD, request.num_bytes())?); + let request_metadata = Arc::new(RequestMetadata::new(request.num_bytes())?); let mut kafka_stuff = None; @@ -1216,7 +1294,7 @@ impl Web3ProxyApp { | "shh_post" | "shh_uninstallFilter" | "shh_version") => { - // TODO: client error stat + // i don't think we will ever support these methods // TODO: what error code? return Ok(( JsonRpcForwardedResponse::from_string( @@ -1235,9 +1313,10 @@ impl Web3ProxyApp { | "eth_newPendingTransactionFilter" | "eth_pollSubscriptions" | "eth_uninstallFilter") => { - // TODO: unsupported command stat + // TODO: unsupported command stat. use the count to prioritize new features // TODO: what error code? return Ok(( + // TODO: what code? JsonRpcForwardedResponse::from_string( format!("not yet implemented: {}", method), None, @@ -1712,7 +1791,7 @@ impl Web3ProxyApp { let rpcs = request_metadata.backend_requests.lock().clone(); if let Some(stat_sender) = self.stat_sender.as_ref() { - let response_stat = ProxyResponseStat::new( + let response_stat = RpcQueryStats::new( method.to_string(), authorization.clone(), request_metadata, @@ -1735,7 +1814,7 @@ impl Web3ProxyApp { let rpcs = request_metadata.backend_requests.lock().clone(); if let Some(stat_sender) = self.stat_sender.as_ref() { - let response_stat = ProxyResponseStat::new( + let response_stat = RpcQueryStats::new( request_method, authorization.clone(), request_metadata, diff --git a/web3_proxy/src/app/ws.rs b/web3_proxy/src/app/ws.rs index b125a5fa..b69cdcc9 100644 --- a/web3_proxy/src/app/ws.rs +++ b/web3_proxy/src/app/ws.rs @@ -1,11 +1,11 @@ //! Websocket-specific functions for the Web3ProxyApp -use super::{Web3ProxyApp, REQUEST_PERIOD}; -use crate::app_stats::ProxyResponseStat; +use super::Web3ProxyApp; use crate::frontend::authorization::{Authorization, RequestMetadata}; use crate::jsonrpc::JsonRpcForwardedResponse; use crate::jsonrpc::JsonRpcRequest; use crate::rpcs::transactions::TxStatus; +use crate::stats::RpcQueryStats; use anyhow::Context; use axum::extract::ws::Message; use ethers::prelude::U64; @@ -33,8 +33,7 @@ impl Web3ProxyApp { .context("finding request size")? .len(); - let request_metadata = - Arc::new(RequestMetadata::new(REQUEST_PERIOD, request_bytes).unwrap()); + let request_metadata = Arc::new(RequestMetadata::new(request_bytes).unwrap()); let (subscription_abort_handle, subscription_registration) = AbortHandle::new_pair(); @@ -68,8 +67,7 @@ impl Web3ProxyApp { }; // TODO: what should the payload for RequestMetadata be? - let request_metadata = - Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap()); + let request_metadata = Arc::new(RequestMetadata::new(0).unwrap()); // TODO: make a struct for this? using our JsonRpcForwardedResponse won't work because it needs an id let response_json = json!({ @@ -97,7 +95,7 @@ impl Web3ProxyApp { }; if let Some(stat_sender) = stat_sender.as_ref() { - let response_stat = ProxyResponseStat::new( + let response_stat = RpcQueryStats::new( "eth_subscription(newHeads)".to_string(), authorization.clone(), request_metadata.clone(), @@ -135,8 +133,7 @@ impl Web3ProxyApp { // TODO: do something with this handle? tokio::spawn(async move { while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await { - let request_metadata = - Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap()); + let request_metadata = Arc::new(RequestMetadata::new(0).unwrap()); let new_tx = match new_tx_state { TxStatus::Pending(tx) => tx, @@ -169,7 +166,7 @@ impl Web3ProxyApp { }; if let Some(stat_sender) = stat_sender.as_ref() { - let response_stat = ProxyResponseStat::new( + let response_stat = RpcQueryStats::new( "eth_subscription(newPendingTransactions)".to_string(), authorization.clone(), request_metadata.clone(), @@ -211,8 +208,7 @@ impl Web3ProxyApp { // TODO: do something with this handle? tokio::spawn(async move { while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await { - let request_metadata = - Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap()); + let request_metadata = Arc::new(RequestMetadata::new(0).unwrap()); let new_tx = match new_tx_state { TxStatus::Pending(tx) => tx, @@ -246,7 +242,7 @@ impl Web3ProxyApp { }; if let Some(stat_sender) = stat_sender.as_ref() { - let response_stat = ProxyResponseStat::new( + let response_stat = RpcQueryStats::new( "eth_subscription(newPendingFullTransactions)".to_string(), authorization.clone(), request_metadata.clone(), @@ -288,8 +284,7 @@ impl Web3ProxyApp { // TODO: do something with this handle? tokio::spawn(async move { while let Some(Ok(new_tx_state)) = pending_tx_receiver.next().await { - let request_metadata = - Arc::new(RequestMetadata::new(REQUEST_PERIOD, 0).unwrap()); + let request_metadata = Arc::new(RequestMetadata::new(0).unwrap()); let new_tx = match new_tx_state { TxStatus::Pending(tx) => tx, @@ -323,7 +318,7 @@ impl Web3ProxyApp { }; if let Some(stat_sender) = stat_sender.as_ref() { - let response_stat = ProxyResponseStat::new( + let response_stat = RpcQueryStats::new( "eth_subscription(newPendingRawTransactions)".to_string(), authorization.clone(), request_metadata.clone(), @@ -354,7 +349,7 @@ impl Web3ProxyApp { let response = JsonRpcForwardedResponse::from_value(json!(subscription_id), id); if let Some(stat_sender) = self.stat_sender.as_ref() { - let response_stat = ProxyResponseStat::new( + let response_stat = RpcQueryStats::new( request_json.method.clone(), authorization.clone(), request_metadata, diff --git a/web3_proxy/src/app_stats.rs b/web3_proxy/src/app_stats.rs deleted file mode 100644 index 681dfcea..00000000 --- a/web3_proxy/src/app_stats.rs +++ /dev/null @@ -1,416 +0,0 @@ -use crate::frontend::authorization::{Authorization, RequestMetadata}; -use axum::headers::Origin; -use chrono::{TimeZone, Utc}; -use derive_more::From; -use entities::rpc_accounting; -use entities::sea_orm_active_enums::LogLevel; -use hashbrown::HashMap; -use hdrhistogram::{Histogram, RecordError}; -use log::{error, info}; -use migration::sea_orm::{self, ActiveModelTrait, DatabaseConnection, DbErr}; -use std::num::NonZeroU64; -use std::sync::atomic::Ordering; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; -use tokio::sync::broadcast; -use tokio::task::JoinHandle; -use tokio::time::{interval_at, Instant}; - -/// TODO: where should this be defined? -/// TODO: can we use something inside sea_orm instead? -#[derive(Debug)] -pub struct ProxyResponseStat { - authorization: Arc, - method: String, - archive_request: bool, - error_response: bool, - request_bytes: u64, - /// if backend_requests is 0, there was a cache_hit - backend_requests: u64, - response_bytes: u64, - response_millis: u64, -} - -impl ProxyResponseStat { - /// TODO: think more about this. probably rename it - fn key(&self) -> ProxyResponseAggregateKey { - // include either the rpc_key_id or the origin - let (mut rpc_key_id, origin) = match ( - self.authorization.checks.rpc_secret_key_id, - &self.authorization.origin, - ) { - (Some(rpc_key_id), _) => { - // TODO: allow the user to opt into saving the origin - (Some(rpc_key_id), None) - } - (None, Some(origin)) => { - // we save the origin for anonymous access - (None, Some(origin.clone())) - } - (None, None) => { - // TODO: what should we do here? log ip? i really don't want to save any ips - (None, None) - } - }; - - let method = match self.authorization.checks.log_level { - LogLevel::None => { - // No rpc_key logging. Only save fully anonymized metric - rpc_key_id = None; - // keep the method since the rpc key is not attached - Some(self.method.clone()) - } - LogLevel::Aggregated => { - // Lose the method - None - } - LogLevel::Detailed => { - // include the method - Some(self.method.clone()) - } - }; - - ProxyResponseAggregateKey { - archive_request: self.archive_request, - error_response: self.error_response, - method, - origin, - rpc_key_id, - } - } -} - -pub struct ProxyResponseHistograms { - request_bytes: Histogram, - response_bytes: Histogram, - response_millis: Histogram, -} - -impl Default for ProxyResponseHistograms { - fn default() -> Self { - // TODO: how many significant figures? - let request_bytes = Histogram::new(5).expect("creating request_bytes histogram"); - let response_bytes = Histogram::new(5).expect("creating response_bytes histogram"); - let response_millis = Histogram::new(5).expect("creating response_millis histogram"); - - Self { - request_bytes, - response_bytes, - response_millis, - } - } -} - -// TODO: think more about if we should include IP address in this -#[derive(Clone, From, Hash, PartialEq, Eq)] -struct ProxyResponseAggregateKey { - archive_request: bool, - error_response: bool, - rpc_key_id: Option, - method: Option, - /// TODO: should this be Origin or String? - origin: Option, -} - -#[derive(Default)] -pub struct ProxyResponseAggregate { - frontend_requests: u64, - backend_requests: u64, - // TODO: related to backend_requests - // backend_retries: u64, - // TODO: related to backend_requests - // no_servers: u64, - cache_misses: u64, - cache_hits: u64, - sum_request_bytes: u64, - sum_response_bytes: u64, - sum_response_millis: u64, - histograms: ProxyResponseHistograms, -} - -/// A stat that we aggregate and then store in a database. -/// For now there is just one, but I think there might be others later -#[derive(Debug, From)] -pub enum Web3ProxyStat { - Response(ProxyResponseStat), -} - -#[derive(From)] -pub struct StatEmitterSpawn { - pub stat_sender: flume::Sender, - /// these handles are important and must be allowed to finish - pub background_handle: JoinHandle>, -} - -pub struct StatEmitter { - chain_id: u64, - db_conn: DatabaseConnection, - period_seconds: u64, -} - -// TODO: impl `+=` for ProxyResponseAggregate? -impl ProxyResponseAggregate { - fn add(&mut self, stat: ProxyResponseStat) -> Result<(), RecordError> { - // a stat always come from just 1 frontend request - self.frontend_requests += 1; - - if stat.backend_requests == 0 { - // no backend request. cache hit! - self.cache_hits += 1; - } else { - // backend requests! cache miss! - self.cache_misses += 1; - - // a stat might have multiple backend requests - self.backend_requests += stat.backend_requests; - } - - self.sum_request_bytes += stat.request_bytes; - self.sum_response_bytes += stat.response_bytes; - self.sum_response_millis += stat.response_millis; - - // TODO: use `record_correct`? - self.histograms.request_bytes.record(stat.request_bytes)?; - self.histograms - .response_millis - .record(stat.response_millis)?; - self.histograms.response_bytes.record(stat.response_bytes)?; - - Ok(()) - } - - // TODO? help to turn this plus the key into a database model? - // TODO: take a db transaction instead so that we can batch - async fn save( - self, - chain_id: u64, - db_conn: &DatabaseConnection, - key: ProxyResponseAggregateKey, - period_timestamp: u64, - ) -> Result<(), DbErr> { - // this is a lot of variables - let period_datetime = Utc.timestamp_opt(period_timestamp as i64, 0).unwrap(); - - let request_bytes = &self.histograms.request_bytes; - - let min_request_bytes = request_bytes.min(); - let mean_request_bytes = request_bytes.mean(); - let p50_request_bytes = request_bytes.value_at_quantile(0.50); - let p90_request_bytes = request_bytes.value_at_quantile(0.90); - let p99_request_bytes = request_bytes.value_at_quantile(0.99); - let max_request_bytes = request_bytes.max(); - - let response_millis = &self.histograms.response_millis; - - let min_response_millis = response_millis.min(); - let mean_response_millis = response_millis.mean(); - let p50_response_millis = response_millis.value_at_quantile(0.50); - let p90_response_millis = response_millis.value_at_quantile(0.90); - let p99_response_millis = response_millis.value_at_quantile(0.99); - let max_response_millis = response_millis.max(); - - let response_bytes = &self.histograms.response_bytes; - - let min_response_bytes = response_bytes.min(); - let mean_response_bytes = response_bytes.mean(); - let p50_response_bytes = response_bytes.value_at_quantile(0.50); - let p90_response_bytes = response_bytes.value_at_quantile(0.90); - let p99_response_bytes = response_bytes.value_at_quantile(0.99); - let max_response_bytes = response_bytes.max(); - - // TODO: Set origin and maybe other things on this model. probably not the ip though - let aggregated_stat_model = rpc_accounting::ActiveModel { - id: sea_orm::NotSet, - // origin: sea_orm::Set(key.authorization.origin.to_string()), - rpc_key_id: sea_orm::Set(key.rpc_key_id.map(Into::into)), - origin: sea_orm::Set(key.origin.map(|x| x.to_string())), - chain_id: sea_orm::Set(chain_id), - method: sea_orm::Set(key.method), - archive_request: sea_orm::Set(key.archive_request), - error_response: sea_orm::Set(key.error_response), - period_datetime: sea_orm::Set(period_datetime), - frontend_requests: sea_orm::Set(self.frontend_requests), - backend_requests: sea_orm::Set(self.backend_requests), - // backend_retries: sea_orm::Set(self.backend_retries), - // no_servers: sea_orm::Set(self.no_servers), - cache_misses: sea_orm::Set(self.cache_misses), - cache_hits: sea_orm::Set(self.cache_hits), - - sum_request_bytes: sea_orm::Set(self.sum_request_bytes), - min_request_bytes: sea_orm::Set(min_request_bytes), - mean_request_bytes: sea_orm::Set(mean_request_bytes), - p50_request_bytes: sea_orm::Set(p50_request_bytes), - p90_request_bytes: sea_orm::Set(p90_request_bytes), - p99_request_bytes: sea_orm::Set(p99_request_bytes), - max_request_bytes: sea_orm::Set(max_request_bytes), - - sum_response_millis: sea_orm::Set(self.sum_response_millis), - min_response_millis: sea_orm::Set(min_response_millis), - mean_response_millis: sea_orm::Set(mean_response_millis), - p50_response_millis: sea_orm::Set(p50_response_millis), - p90_response_millis: sea_orm::Set(p90_response_millis), - p99_response_millis: sea_orm::Set(p99_response_millis), - max_response_millis: sea_orm::Set(max_response_millis), - - sum_response_bytes: sea_orm::Set(self.sum_response_bytes), - min_response_bytes: sea_orm::Set(min_response_bytes), - mean_response_bytes: sea_orm::Set(mean_response_bytes), - p50_response_bytes: sea_orm::Set(p50_response_bytes), - p90_response_bytes: sea_orm::Set(p90_response_bytes), - p99_response_bytes: sea_orm::Set(p99_response_bytes), - max_response_bytes: sea_orm::Set(max_response_bytes), - }; - - aggregated_stat_model.save(db_conn).await?; - - Ok(()) - } -} - -impl ProxyResponseStat { - pub fn new( - method: String, - authorization: Arc, - metadata: Arc, - response_bytes: usize, - ) -> Self { - let archive_request = metadata.archive_request.load(Ordering::Acquire); - let backend_requests = metadata.backend_requests.lock().len() as u64; - // let period_seconds = metadata.period_seconds; - // let period_timestamp = - // (metadata.start_datetime.timestamp() as u64) / period_seconds * period_seconds; - let request_bytes = metadata.request_bytes; - let error_response = metadata.error_response.load(Ordering::Acquire); - - // TODO: timestamps could get confused by leap seconds. need tokio time instead - let response_millis = metadata.start_instant.elapsed().as_millis() as u64; - - let response_bytes = response_bytes as u64; - - Self { - authorization, - archive_request, - method, - backend_requests, - request_bytes, - error_response, - response_bytes, - response_millis, - } - } -} - -impl StatEmitter { - pub fn spawn( - chain_id: u64, - db_conn: DatabaseConnection, - period_seconds: u64, - shutdown_receiver: broadcast::Receiver<()>, - ) -> anyhow::Result { - let (stat_sender, stat_receiver) = flume::unbounded(); - - let mut new = Self { - chain_id, - db_conn, - period_seconds, - }; - - // TODO: send any errors somewhere - let handle = - tokio::spawn(async move { new.stat_loop(stat_receiver, shutdown_receiver).await }); - - Ok((stat_sender, handle).into()) - } - - async fn stat_loop( - &mut self, - stat_receiver: flume::Receiver, - mut shutdown_receiver: broadcast::Receiver<()>, - ) -> anyhow::Result<()> { - let system_now = SystemTime::now(); - - let duration_since_epoch = system_now - .duration_since(SystemTime::UNIX_EPOCH) - .expect("time machines don't exist"); - - // TODO: change period_seconds from u64 to u32 - let current_period = duration_since_epoch - .checked_div(self.period_seconds as u32) - .unwrap() - * self.period_seconds as u32; - - let duration_to_next_period = - Duration::from_secs(self.period_seconds) - (duration_since_epoch - current_period); - - // start the interval when the next period starts - let start_instant = Instant::now() + duration_to_next_period; - let mut interval = interval_at(start_instant, Duration::from_secs(self.period_seconds)); - - // loop between different futures to update these mutables - let mut period_timestamp = current_period.as_secs(); - let mut response_aggregate_map = - HashMap::::new(); - - loop { - tokio::select! { - stat = stat_receiver.recv_async() => { - match stat? { - Web3ProxyStat::Response(stat) => { - let key = stat.key(); - - // TODO: does hashmap have get_or_insert? - if ! response_aggregate_map.contains_key(&key) { - response_aggregate_map.insert(key.clone(), Default::default()); - }; - - if let Some(value) = response_aggregate_map.get_mut(&key) { - if let Err(err) = value.add(stat) { - error!( "unable to aggregate stats! err={:?}", err); - }; - } else { - unimplemented!(); - } - } - } - } - _ = interval.tick() => { - // save all the aggregated stats - // TODO: batch these saves - for (key, aggregate) in response_aggregate_map.drain() { - if let Err(err) = aggregate.save(self.chain_id, &self.db_conn, key, period_timestamp).await { - error!("Unable to save stat while shutting down! {:?}", err); - }; - } - // advance to the next period - // TODO: is this safe? what if there is drift? - period_timestamp += self.period_seconds; - } - x = shutdown_receiver.recv() => { - match x { - Ok(_) => { - info!("aggregate stat_loop shutting down"); - // TODO: call aggregate_stat for all the - }, - Err(err) => error!("shutdown receiver. err={:?}", err), - } - break; - } - } - } - - info!("saving {} pending stats", response_aggregate_map.len()); - - for (key, aggregate) in response_aggregate_map.drain() { - if let Err(err) = aggregate - .save(self.chain_id, &self.db_conn, key, period_timestamp) - .await - { - error!("Unable to save stat while shutting down! err={:?}", err); - }; - } - - info!("aggregated stat_loop shut down"); - - Ok(()) - } -} diff --git a/web3_proxy/src/bin/web3_proxy_cli/main.rs b/web3_proxy/src/bin/web3_proxy_cli/main.rs index 99b8c042..7d1d2b5d 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/main.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/main.rs @@ -250,6 +250,9 @@ fn main() -> anyhow::Result<()> { } // set up tokio's async runtime + #[cfg(tokio_uring)] + let mut rt_builder = tokio_uring::Builder::new_multi_thread(); + #[cfg(not(tokio_uring))] let mut rt_builder = runtime::Builder::new_multi_thread(); rt_builder.enable_all(); diff --git a/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs b/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs index 3c16dc59..b0c2138c 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/proxyd.rs @@ -1,7 +1,7 @@ #![forbid(unsafe_code)] use argh::FromArgs; use futures::StreamExt; -use log::{error, info, warn}; +use log::{error, info, trace, warn}; use num::Zero; use std::path::PathBuf; use std::time::Duration; @@ -9,7 +9,7 @@ use std::{fs, thread}; use tokio::sync::broadcast; use web3_proxy::app::{flatten_handle, flatten_handles, Web3ProxyApp}; use web3_proxy::config::TopConfig; -use web3_proxy::{frontend, metrics_frontend}; +use web3_proxy::{frontend, prometheus}; /// start the main proxy daemon #[derive(FromArgs, PartialEq, Debug, Eq)] @@ -33,7 +33,6 @@ impl ProxydSubCommand { num_workers: usize, ) -> anyhow::Result<()> { let (shutdown_sender, _) = broadcast::channel(1); - // TODO: i think there is a small race. if config_path changes run( @@ -54,7 +53,7 @@ async fn run( frontend_port: u16, prometheus_port: u16, num_workers: usize, - shutdown_sender: broadcast::Sender<()>, + frontend_shutdown_sender: broadcast::Sender<()>, ) -> anyhow::Result<()> { // tokio has code for catching ctrl+c so we use that // this shutdown sender is currently only used in tests, but we might make a /shutdown endpoint or something @@ -62,115 +61,106 @@ async fn run( let app_frontend_port = frontend_port; let app_prometheus_port = prometheus_port; - let mut shutdown_receiver = shutdown_sender.subscribe(); + + // TODO: should we use a watch or broadcast for these? + let (app_shutdown_sender, _app_shutdown_receiver) = broadcast::channel(1); + + let frontend_shutdown_receiver = frontend_shutdown_sender.subscribe(); + let prometheus_shutdown_receiver = app_shutdown_sender.subscribe(); + + // TODO: should we use a watch or broadcast for these? + let (frontend_shutdown_complete_sender, mut frontend_shutdown_complete_receiver) = + broadcast::channel(1); // start the main app - let mut spawned_app = - Web3ProxyApp::spawn(top_config.clone(), num_workers, shutdown_sender.subscribe()).await?; + let mut spawned_app = Web3ProxyApp::spawn(top_config, num_workers, app_shutdown_sender.clone()).await?; // start thread for watching config - if let Some(top_config_path) = top_config_path { - let config_sender = spawned_app.new_top_config_sender; - /* - #[cfg(feature = "inotify")] - { - let mut inotify = Inotify::init().expect("Failed to initialize inotify"); - - inotify - .add_watch(top_config_path.clone(), WatchMask::MODIFY) - .expect("Failed to add inotify watch on config"); - - let mut buffer = [0u8; 4096]; - - // TODO: exit the app if this handle exits - thread::spawn(move || loop { - // TODO: debounce - - let events = inotify - .read_events_blocking(&mut buffer) - .expect("Failed to read inotify events"); - - for event in events { - if event.mask.contains(EventMask::MODIFY) { - info!("config changed"); - match fs::read_to_string(&top_config_path) { - Ok(top_config) => match toml::from_str(&top_config) { - Ok(top_config) => { - config_sender.send(top_config).unwrap(); - } - Err(err) => { - // TODO: panic? - error!("Unable to parse config! {:#?}", err); - } - }, - Err(err) => { - // TODO: panic? - error!("Unable to read config! {:#?}", err); - } - }; - } else { - // TODO: is "MODIFY" enough, or do we want CLOSE_WRITE? - unimplemented!(); - } - } - }); - } - */ - // #[cfg(not(feature = "inotify"))] - { - thread::spawn(move || loop { - match fs::read_to_string(&top_config_path) { - Ok(new_top_config) => match toml::from_str(&new_top_config) { - Ok(new_top_config) => { - if new_top_config != top_config { - top_config = new_top_config; - config_sender.send(top_config.clone()).unwrap(); - } - } - Err(err) => { - // TODO: panic? - error!("Unable to parse config! {:#?}", err); - } - }, - Err(err) => { - // TODO: panic? - error!("Unable to read config! {:#?}", err); - } - } - - thread::sleep(Duration::from_secs(10)); - }); - } - } + // if let Some(top_config_path) = top_config_path { + // let config_sender = spawned_app.new_top_config_sender; + // { + // thread::spawn(move || loop { + // match fs::read_to_string(&top_config_path) { + // Ok(new_top_config) => match toml::from_str(&new_top_config) { + // Ok(new_top_config) => { + // if new_top_config != top_config { + // top_config = new_top_config; + // config_sender.send(top_config.clone()).unwrap(); + // } + // } + // Err(err) => { + // // TODO: panic? + // error!("Unable to parse config! {:#?}", err); + // } + // }, + // Err(err) => { + // // TODO: panic? + // error!("Unable to read config! {:#?}", err); + // } + // } + // + // thread::sleep(Duration::from_secs(10)); + // }); + // } + // } // start the prometheus metrics port - let prometheus_handle = tokio::spawn(metrics_frontend::serve( + let prometheus_handle = tokio::spawn(prometheus::serve( spawned_app.app.clone(), app_prometheus_port, + prometheus_shutdown_receiver, )); // wait until the app has seen its first consensus head block - // TODO: if backups were included, wait a little longer? - let _ = spawned_app.app.head_block_receiver().changed().await; + // if backups were included, wait a little longer + for _ in 0..3 { + let _ = spawned_app.consensus_connections_watcher.changed().await; + + let consensus = spawned_app + .consensus_connections_watcher + .borrow_and_update(); + + if *consensus.context("Channel closed!")?.backups_needed { + info!( + "waiting longer. found consensus with backups: {}", + *consensus.context("Channel closed!")?.head_block.as_ref().unwrap(), + ); + } else { + // TODO: also check that we have at least one archive node connected? + break; + } + } // start the frontend port - let frontend_handle = tokio::spawn(frontend::serve(app_frontend_port, spawned_app.app.clone())); + let frontend_handle = tokio::spawn(frontend::serve( + app_frontend_port, + spawned_app.app.clone(), + frontend_shutdown_receiver, + frontend_shutdown_complete_sender, + )); + + let frontend_handle = flatten_handle(frontend_handle); // if everything is working, these should all run forever + let mut exited_with_err = false; + let mut frontend_exited = false; tokio::select! { x = flatten_handles(spawned_app.app_handles) => { match x { Ok(_) => info!("app_handle exited"), Err(e) => { - return Err(e); + error!("app_handle exited: {:#?}", e); + exited_with_err = true; } } } - x = flatten_handle(frontend_handle) => { + x = frontend_handle => { + frontend_exited = true; match x { Ok(_) => info!("frontend exited"), Err(e) => { - return Err(e); + error!("frontend exited: {:#?}", e); + exited_with_err = true; } } } @@ -178,35 +168,62 @@ async fn run( match x { Ok(_) => info!("prometheus exited"), Err(e) => { - return Err(e); + error!("prometheus exited: {:#?}", e); + exited_with_err = true; } } } x = tokio::signal::ctrl_c() => { + // TODO: unix terminate signal, too match x { Ok(_) => info!("quiting from ctrl-c"), Err(e) => { - return Err(e.into()); + // TODO: i don't think this is possible + error!("error quiting from ctrl-c: {:#?}", e); + exited_with_err = true; } } } - x = shutdown_receiver.recv() => { + // TODO: how can we properly watch background handles here? this returns None immediatly and the app exits. i think the bug is somewhere else though + x = spawned_app.background_handles.next() => { match x { - Ok(_) => info!("quiting from shutdown receiver"), - Err(e) => { - return Err(e.into()); + Some(Ok(_)) => info!("quiting from background handles"), + Some(Err(e)) => { + error!("quiting from background handle error: {:#?}", e); + exited_with_err = true; + } + None => { + // TODO: is this an error? + warn!("background handles exited"); } } } }; - // one of the handles stopped. send a value so the others know to shut down - if let Err(err) = shutdown_sender.send(()) { - warn!("shutdown sender err={:?}", err); + // if a future above completed, make sure the frontend knows to start turning off + if !frontend_exited { + if let Err(err) = frontend_shutdown_sender.send(()) { + // TODO: this is actually expected if the frontend is already shut down + warn!("shutdown sender err={:?}", err); + }; + } + + // TODO: wait until the frontend completes + if let Err(err) = frontend_shutdown_complete_receiver.recv().await { + warn!("shutdown completition err={:?}", err); + } else { + info!("frontend exited gracefully"); + } + + // now that the frontend is complete, tell all the other futures to finish + if let Err(err) = app_shutdown_sender.send(()) { + warn!("backend sender err={:?}", err); }; - // wait for things like saving stats to the database to complete - info!("waiting on important background tasks"); + info!( + "waiting on {} important background tasks", + spawned_app.background_handles.len() + ); let mut background_errors = 0; while let Some(x) = spawned_app.background_handles.next().await { match x { @@ -218,15 +235,19 @@ async fn run( error!("{:?}", e); background_errors += 1; } - Ok(Ok(_)) => continue, + Ok(Ok(_)) => { + // TODO: how can we know which handle exited? + trace!("a background handle exited"); + continue; + } } } - if background_errors.is_zero() { + if background_errors.is_zero() && !exited_with_err { info!("finished"); Ok(()) } else { - // TODO: collect instead? + // TODO: collect all the errors here instead? Err(anyhow::anyhow!("finished with errors!")) } } @@ -319,15 +340,14 @@ mod tests { extra: Default::default(), }; - let (shutdown_sender, _) = broadcast::channel(1); + let (shutdown_sender, _shutdown_receiver) = broadcast::channel(1); // spawn another thread for running the app // TODO: allow launching into the local tokio runtime instead of creating a new one? let handle = { - let shutdown_sender = shutdown_sender.clone(); - let frontend_port = 0; let prometheus_port = 0; + let shutdown_sender = shutdown_sender.clone(); tokio::spawn(async move { run( diff --git a/web3_proxy/src/bin/web3_proxy_cli/user_export.rs b/web3_proxy/src/bin/web3_proxy_cli/user_export.rs index aea08485..c75e9311 100644 --- a/web3_proxy/src/bin/web3_proxy_cli/user_export.rs +++ b/web3_proxy/src/bin/web3_proxy_cli/user_export.rs @@ -4,7 +4,6 @@ use log::info; use migration::sea_orm::{DatabaseConnection, EntityTrait, PaginatorTrait}; use std::fs::{self, create_dir_all}; use std::path::Path; -use std::time::{SystemTime, UNIX_EPOCH}; #[derive(FromArgs, PartialEq, Eq, Debug)] /// Export users from the database. @@ -21,7 +20,7 @@ impl UserExportSubCommand { // create the output dir if it does not exist create_dir_all(&self.output_dir)?; - let now = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs(); + let now = chrono::Utc::now().timestamp(); let export_dir = Path::new(&self.output_dir); diff --git a/web3_proxy/src/config.rs b/web3_proxy/src/config.rs index 5501091c..f9010902 100644 --- a/web3_proxy/src/config.rs +++ b/web3_proxy/src/config.rs @@ -145,7 +145,7 @@ pub struct AppConfig { /// None = allow all requests pub public_requests_per_period: Option, - /// Salt for hashing recent ips + /// Salt for hashing recent ips. Not a perfect way to introduce privacy, but better than nothing pub public_recent_ips_salt: Option, /// RPC responses are cached locally @@ -169,6 +169,15 @@ pub struct AppConfig { /// If none, the minimum * 2 is used pub volatile_redis_max_connections: Option, + /// influxdb host for stats + pub influxdb_host: Option, + + /// influxdb org for stats + pub influxdb_org: Option, + + /// influxdb token for stats + pub influxdb_token: Option, + /// unknown config options get put here #[serde(flatten, default = "HashMap::default")] pub extra: HashMap, diff --git a/web3_proxy/src/frontend/authorization.rs b/web3_proxy/src/frontend/authorization.rs index b0890878..4ab6d66f 100644 --- a/web3_proxy/src/frontend/authorization.rs +++ b/web3_proxy/src/frontend/authorization.rs @@ -10,6 +10,7 @@ use axum::headers::authorization::Bearer; use axum::headers::{Header, Origin, Referer, UserAgent}; use chrono::Utc; use deferred_rate_limiter::DeferredRateLimitResult; +use entities::sea_orm_active_enums::TrackingLevel; use entities::{login, rpc_key, user, user_tier}; use ethers::types::Bytes; use ethers::utils::keccak256; @@ -72,10 +73,7 @@ pub struct Authorization { #[derive(Debug)] pub struct RequestMetadata { - pub start_datetime: chrono::DateTime, pub start_instant: tokio::time::Instant, - // TODO: better name for this - pub period_seconds: u64, pub request_bytes: u64, // TODO: do we need atomics? seems like we should be able to pass a &mut around // TODO: "archive" isn't really a boolean. @@ -90,14 +88,12 @@ pub struct RequestMetadata { } impl RequestMetadata { - pub fn new(period_seconds: u64, request_bytes: usize) -> anyhow::Result { + pub fn new(request_bytes: usize) -> anyhow::Result { // TODO: how can we do this without turning it into a string first. this is going to slow us down! let request_bytes = request_bytes as u64; let new = Self { start_instant: Instant::now(), - start_datetime: Utc::now(), - period_seconds, request_bytes, archive_request: false.into(), backend_requests: Default::default(), @@ -183,6 +179,7 @@ impl Authorization { let authorization_checks = AuthorizationChecks { // any error logs on a local (internal) query are likely problems. log them all log_revert_chance: 1.0, + tracking_level: TrackingLevel::Detailed, // default for everything else should be fine. we don't have a user_id or ip to give ..Default::default() }; @@ -220,10 +217,10 @@ impl Authorization { }) .unwrap_or_default(); - // TODO: default or None? let authorization_checks = AuthorizationChecks { max_requests_per_period, proxy_mode, + tracking_level: TrackingLevel::Detailed, ..Default::default() }; @@ -616,7 +613,7 @@ impl Web3ProxyApp { proxy_mode: ProxyMode, ) -> anyhow::Result { // ip rate limits don't check referer or user agent - // the do check + // the do check origin because we can override rate limits for some origins let authorization = Authorization::external( allowed_origin_requests_per_period, self.db_conn.clone(), @@ -766,7 +763,7 @@ impl Web3ProxyApp { allowed_origins, allowed_referers, allowed_user_agents, - log_level: rpc_key_model.log_level, + tracking_level: rpc_key_model.log_level, log_revert_chance: rpc_key_model.log_revert_chance, max_concurrent_requests: user_tier_model.max_concurrent_requests, max_requests_per_period: user_tier_model.max_requests_per_period, diff --git a/web3_proxy/src/frontend/mod.rs b/web3_proxy/src/frontend/mod.rs index bfa7256d..549ef287 100644 --- a/web3_proxy/src/frontend/mod.rs +++ b/web3_proxy/src/frontend/mod.rs @@ -1,4 +1,4 @@ -//! `frontend` contains HTTP and websocket endpoints for use by users and admins. +//! `frontend` contains HTTP and websocket endpoints for use by a website or web3 wallet. //! //! Important reading about axum extractors: https://docs.rs/axum/latest/axum/extract/index.html#the-order-of-extractors @@ -22,28 +22,34 @@ use moka::future::Cache; use std::net::SocketAddr; use std::sync::Arc; use std::{iter::once, time::Duration}; +use tokio::sync::broadcast; use tower_http::cors::CorsLayer; use tower_http::sensitive_headers::SetSensitiveRequestHeadersLayer; +/// simple keys for caching responses #[derive(Clone, Hash, PartialEq, Eq)] pub enum FrontendResponseCaches { Status, } -// TODO: what should this cache's value be? -pub type FrontendResponseCache = +pub type FrontendJsonResponseCache = Cache, hashbrown::hash_map::DefaultHashBuilder>; pub type FrontendHealthCache = Cache<(), bool, hashbrown::hash_map::DefaultHashBuilder>; /// Start the frontend server. -pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<()> { +pub async fn serve( + port: u16, + proxy_app: Arc, + mut shutdown_receiver: broadcast::Receiver<()>, + shutdown_complete_sender: broadcast::Sender<()>, +) -> anyhow::Result<()> { // setup caches for whatever the frontend needs - // TODO: a moka cache is probably way overkill for this. - // no need for max items. only expire because of time to live - let response_cache: FrontendResponseCache = Cache::builder() + // no need for max items since it is limited by the enum key + let json_response_cache: FrontendJsonResponseCache = Cache::builder() .time_to_live(Duration::from_secs(2)) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); + // /health gets a cache with a shorter lifetime let health_cache: FrontendHealthCache = Cache::builder() .time_to_live(Duration::from_millis(100)) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); @@ -208,7 +214,7 @@ pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<() // application state .layer(Extension(proxy_app.clone())) // frontend caches - .layer(Extension(response_cache)) + .layer(Extension(json_response_cache)) .layer(Extension(health_cache)) // 404 for any unknown routes .fallback(errors::handler_404); @@ -229,9 +235,16 @@ pub async fn serve(port: u16, proxy_app: Arc) -> anyhow::Result<() let service = app.into_make_service_with_connect_info::(); // `axum::Server` is a re-export of `hyper::Server` - axum::Server::bind(&addr) + let server = axum::Server::bind(&addr) // TODO: option to use with_connect_info. we want it in dev, but not when running behind a proxy, but not .serve(service) + .with_graceful_shutdown(async move { + let _ = shutdown_receiver.recv().await; + }) .await - .map_err(Into::into) + .map_err(Into::into); + + let _ = shutdown_complete_sender.send(()); + + server } diff --git a/web3_proxy/src/frontend/rpc_proxy_ws.rs b/web3_proxy/src/frontend/rpc_proxy_ws.rs index 2676f9e5..072ad854 100644 --- a/web3_proxy/src/frontend/rpc_proxy_ws.rs +++ b/web3_proxy/src/frontend/rpc_proxy_ws.rs @@ -4,8 +4,7 @@ use super::authorization::{ip_is_authorized, key_is_authorized, Authorization, RequestMetadata}; use super::errors::{FrontendErrorResponse, FrontendResult}; -use crate::app::REQUEST_PERIOD; -use crate::app_stats::ProxyResponseStat; +use crate::stats::RpcQueryStats; use crate::{ app::Web3ProxyApp, jsonrpc::{JsonRpcForwardedResponse, JsonRpcForwardedResponseEnum, JsonRpcRequest}, @@ -379,8 +378,7 @@ async fn handle_socket_payload( // TODO: move this logic into the app? let request_bytes = json_request.num_bytes(); - let request_metadata = - Arc::new(RequestMetadata::new(REQUEST_PERIOD, request_bytes).unwrap()); + let request_metadata = Arc::new(RequestMetadata::new(request_bytes).unwrap()); let subscription_id = json_request.params.unwrap().to_string(); @@ -401,7 +399,7 @@ async fn handle_socket_payload( JsonRpcForwardedResponse::from_value(json!(partial_response), id.clone()); if let Some(stat_sender) = app.stat_sender.as_ref() { - let response_stat = ProxyResponseStat::new( + let response_stat = RpcQueryStats::new( json_request.method.clone(), authorization.clone(), request_metadata, diff --git a/web3_proxy/src/frontend/status.rs b/web3_proxy/src/frontend/status.rs index 1199dc25..58b0a7a4 100644 --- a/web3_proxy/src/frontend/status.rs +++ b/web3_proxy/src/frontend/status.rs @@ -3,7 +3,7 @@ //! For ease of development, users can currently access these endponts. //! They will eventually move to another port. -use super::{FrontendHealthCache, FrontendResponseCache, FrontendResponseCaches}; +use super::{FrontendHealthCache, FrontendJsonResponseCache, FrontendResponseCaches}; use crate::app::{Web3ProxyApp, APP_USER_AGENT}; use axum::{http::StatusCode, response::IntoResponse, Extension, Json}; use axum_macros::debug_handler; @@ -33,7 +33,7 @@ pub async fn health( #[debug_handler] pub async fn status( Extension(app): Extension>, - Extension(response_cache): Extension, + Extension(response_cache): Extension, ) -> impl IntoResponse { let body = response_cache .get_with(FrontendResponseCaches::Status, async { diff --git a/web3_proxy/src/frontend/users.rs b/web3_proxy/src/frontend/users.rs index 67b33f93..fe765c6a 100644 --- a/web3_proxy/src/frontend/users.rs +++ b/web3_proxy/src/frontend/users.rs @@ -2,10 +2,11 @@ use super::authorization::{login_is_authorized, RpcSecretKey}; use super::errors::FrontendResult; use crate::app::Web3ProxyApp; -use crate::user_queries::get_page_from_params; -use crate::user_queries::{ - get_chain_id_from_params, get_query_start_from_params, query_user_stats, StatResponse, +use crate::http_params::{ + get_chain_id_from_params, get_page_from_params, get_query_start_from_params, }; +use crate::stats::db_queries::query_user_stats; +use crate::stats::StatType; use crate::user_token::UserBearerToken; use crate::{PostLogin, PostLoginQuery}; use anyhow::Context; @@ -19,7 +20,7 @@ use axum::{ use axum_client_ip::InsecureClientIp; use axum_macros::debug_handler; use chrono::{TimeZone, Utc}; -use entities::sea_orm_active_enums::LogLevel; +use entities::sea_orm_active_enums::TrackingLevel; use entities::{login, pending_login, revert_log, rpc_key, user}; use ethers::{prelude::Address, types::Bytes}; use hashbrown::HashMap; @@ -489,9 +490,7 @@ pub async fn user_balance_get( /// /// We will subscribe to events to watch for any user deposits, but sometimes events can be missed. /// -/// TODO: rate limit by user -/// TODO: one key per request? maybe /user/balance/:rpc_key? -/// TODO: this will change as we add better support for secondary users. +/// TODO: change this. just have a /tx/:txhash that is open to anyone. rate limit like we rate limit /login #[debug_handler] pub async fn user_balance_post( Extension(app): Extension>, @@ -503,8 +502,6 @@ pub async fn user_balance_post( } /// `GET /user/keys` -- Use a bearer token to get the user's api keys and their settings. -/// -/// TODO: one key per request? maybe /user/keys/:rpc_key? #[debug_handler] pub async fn rpc_keys_get( Extension(app): Extension>, @@ -514,7 +511,7 @@ pub async fn rpc_keys_get( let db_replica = app .db_replica() - .context("getting db to fetch user's keys")?; + .context("db_replica is required to fetch a user's keys")?; let uks = rpc_key::Entity::find() .filter(rpc_key::Column::UserId.eq(user.id)) @@ -522,7 +519,6 @@ pub async fn rpc_keys_get( .await .context("failed loading user's key")?; - // TODO: stricter type on this? let response_json = json!({ "user_id": user.id, "user_rpc_keys": uks @@ -560,7 +556,7 @@ pub struct UserKeyManagement { allowed_referers: Option, allowed_user_agents: Option, description: Option, - log_level: Option, + log_level: Option, // TODO: enable log_revert_trace: Option, private_txs: Option, } @@ -813,7 +809,7 @@ pub async fn user_stats_aggregated_get( bearer: Option>>, Query(params): Query>, ) -> FrontendResult { - let response = query_user_stats(&app, bearer, ¶ms, StatResponse::Aggregated).await?; + let response = query_user_stats(&app, bearer, ¶ms, StatType::Aggregated).await?; Ok(response) } @@ -833,7 +829,7 @@ pub async fn user_stats_detailed_get( bearer: Option>>, Query(params): Query>, ) -> FrontendResult { - let response = query_user_stats(&app, bearer, ¶ms, StatResponse::Detailed).await?; + let response = query_user_stats(&app, bearer, ¶ms, StatType::Detailed).await?; Ok(response) } diff --git a/web3_proxy/src/http_params.rs b/web3_proxy/src/http_params.rs new file mode 100644 index 00000000..b462274a --- /dev/null +++ b/web3_proxy/src/http_params.rs @@ -0,0 +1,206 @@ +use crate::app::DatabaseReplica; +use crate::frontend::errors::FrontendErrorResponse; +use crate::{app::Web3ProxyApp, user_token::UserBearerToken}; +use anyhow::Context; +use axum::{ + headers::{authorization::Bearer, Authorization}, + TypedHeader, +}; +use chrono::{NaiveDateTime, Utc}; +use entities::login; +use hashbrown::HashMap; +use log::{debug, trace, warn}; +use migration::sea_orm::{ColumnTrait, DatabaseConnection, EntityTrait, QueryFilter}; +use redis_rate_limiter::{redis::AsyncCommands, RedisConnection}; + +/// get the attached address for the given bearer token. +/// First checks redis. Then checks the database. +/// 0 means all users. +/// This authenticates that the bearer is allowed to view this user_id's stats +pub async fn get_user_id_from_params( + redis_conn: &mut RedisConnection, + db_conn: &DatabaseConnection, + db_replica: &DatabaseReplica, + // this is a long type. should we strip it down? + bearer: Option>>, + params: &HashMap, +) -> Result { + match (bearer, params.get("user_id")) { + (Some(TypedHeader(Authorization(bearer))), Some(user_id)) => { + // check for the bearer cache key + let user_bearer_token = UserBearerToken::try_from(bearer)?; + + let user_redis_key = user_bearer_token.redis_key(); + + let mut save_to_redis = false; + + // get the user id that is attached to this bearer token + let bearer_user_id = match redis_conn.get::<_, u64>(&user_redis_key).await { + Err(_) => { + // TODO: inspect the redis error? if redis is down we should warn + // this also means redis being down will not kill our app. Everything will need a db read query though. + + let user_login = login::Entity::find() + .filter(login::Column::BearerToken.eq(user_bearer_token.uuid())) + .one(db_replica.conn()) + .await + .context("database error while querying for user")? + .ok_or(FrontendErrorResponse::AccessDenied)?; + + // if expired, delete ALL expired logins + let now = Utc::now(); + if now > user_login.expires_at { + // this row is expired! do not allow auth! + // delete ALL expired logins. + let delete_result = login::Entity::delete_many() + .filter(login::Column::ExpiresAt.lte(now)) + .exec(db_conn) + .await?; + + // TODO: emit a stat? if this is high something weird might be happening + debug!("cleared expired logins: {:?}", delete_result); + + return Err(FrontendErrorResponse::AccessDenied); + } + + save_to_redis = true; + + user_login.user_id + } + Ok(x) => { + // TODO: push cache ttl further in the future? + x + } + }; + + let user_id: u64 = user_id.parse().context("Parsing user_id param")?; + + if bearer_user_id != user_id { + return Err(FrontendErrorResponse::AccessDenied); + } + + if save_to_redis { + // TODO: how long? we store in database for 4 weeks + const ONE_DAY: usize = 60 * 60 * 24; + + if let Err(err) = redis_conn + .set_ex::<_, _, ()>(user_redis_key, user_id, ONE_DAY) + .await + { + warn!("Unable to save user bearer token to redis: {}", err) + } + } + + Ok(bearer_user_id) + } + (_, None) => { + // they have a bearer token. we don't care about it on public pages + // 0 means all + Ok(0) + } + (None, Some(_)) => { + // they do not have a bearer token, but requested a specific id. block + // TODO: proper error code from a useful error code + // TODO: maybe instead of this sharp edged warn, we have a config value? + // TODO: check config for if we should deny or allow this + Err(FrontendErrorResponse::AccessDenied) + // // TODO: make this a flag + // warn!("allowing without auth during development!"); + // Ok(x.parse()?) + } + } +} + +/// only allow rpc_key to be set if user_id is also set. +/// this will keep people from reading someone else's keys. +/// 0 means none. + +pub fn get_rpc_key_id_from_params( + user_id: u64, + params: &HashMap, +) -> anyhow::Result { + if user_id > 0 { + params.get("rpc_key_id").map_or_else( + || Ok(0), + |c| { + let c = c.parse()?; + + Ok(c) + }, + ) + } else { + Ok(0) + } +} + +pub fn get_chain_id_from_params( + app: &Web3ProxyApp, + params: &HashMap, +) -> anyhow::Result { + params.get("chain_id").map_or_else( + || Ok(app.config.chain_id), + |c| { + let c = c.parse()?; + + Ok(c) + }, + ) +} + +pub fn get_page_from_params(params: &HashMap) -> anyhow::Result { + params.get("page").map_or_else::, _, _>( + || { + // no page in params. set default + Ok(0) + }, + |x: &String| { + // parse the given timestamp + // TODO: error code 401 + let x = x.parse().context("parsing page query from params")?; + + Ok(x) + }, + ) +} + +// TODO: return chrono::Utc instead? +pub fn get_query_start_from_params( + params: &HashMap, +) -> anyhow::Result { + params.get("query_start").map_or_else( + || { + // no timestamp in params. set default + let x = chrono::Utc::now() - chrono::Duration::days(30); + + Ok(x.naive_utc()) + }, + |x: &String| { + // parse the given timestamp + let x = x.parse::().context("parsing timestamp query param")?; + + // TODO: error code 401 + let x = + NaiveDateTime::from_timestamp_opt(x, 0).context("parsing timestamp query param")?; + + Ok(x) + }, + ) +} + +pub fn get_query_window_seconds_from_params( + params: &HashMap, +) -> Result { + params.get("query_window_seconds").map_or_else( + || { + // no page in params. set default + Ok(0) + }, + |query_window_seconds: &String| { + // parse the given timestamp + query_window_seconds.parse::().map_err(|err| { + trace!("Unable to parse rpc_key_id: {:#?}", err); + FrontendErrorResponse::BadRequest("Unable to parse rpc_key_id".to_string()) + }) + }, + ) +} diff --git a/web3_proxy/src/jsonrpc.rs b/web3_proxy/src/jsonrpc.rs index 7a601c20..0a6435c6 100644 --- a/web3_proxy/src/jsonrpc.rs +++ b/web3_proxy/src/jsonrpc.rs @@ -30,7 +30,8 @@ impl fmt::Debug for JsonRpcRequest { f.debug_struct("JsonRpcRequest") .field("id", &self.id) .field("method", &self.method) - .finish_non_exhaustive() + .field("params", &self.params) + .finish() } } diff --git a/web3_proxy/src/lib.rs b/web3_proxy/src/lib.rs index e31d0972..83fa397b 100644 --- a/web3_proxy/src/lib.rs +++ b/web3_proxy/src/lib.rs @@ -1,15 +1,15 @@ pub mod app; -pub mod app_stats; pub mod admin_queries; pub mod atomics; pub mod block_number; pub mod config; pub mod frontend; +pub mod http_params; pub mod jsonrpc; -pub mod metrics_frontend; pub mod pagerduty; +pub mod prometheus; pub mod rpcs; -pub mod user_queries; +pub mod stats; pub mod user_token; use serde::Deserialize; diff --git a/web3_proxy/src/metered/jsonrpc_error_count.rs b/web3_proxy/src/metered/jsonrpc_error_count.rs deleted file mode 100644 index eb8ed33f..00000000 --- a/web3_proxy/src/metered/jsonrpc_error_count.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! A module providing the `JsonRpcErrorCount` metric. - -use ethers::providers::ProviderError; -use serde::Serialize; -use std::ops::Deref; - -/// A metric counting how many times an expression typed std `Result` as -/// returned an `Err` variant. -/// -/// This is a light-weight metric. -/// -/// By default, `ErrorCount` uses a lock-free `u64` `Counter`, which makes sense -/// in multithread scenarios. Non-threaded applications can gain performance by -/// using a `std::cell:Cell` instead. -#[derive(Clone, Default, Debug, Serialize)] -pub struct JsonRpcErrorCount>(pub C); - -impl Metric> for JsonRpcErrorCount {} - -impl Enter for JsonRpcErrorCount { - type E = (); - fn enter(&self) {} -} - -impl OnResult> for JsonRpcErrorCount { - /// Unlike the default ErrorCount, this one does not increment for internal jsonrpc errors - /// TODO: count errors like this on another helper - fn on_result(&self, _: (), r: &Result) -> Advice { - match r { - Ok(_) => {} - Err(ProviderError::JsonRpcClientError(_)) => { - self.0.incr(); - } - Err(_) => { - // TODO: count jsonrpc errors - } - } - Advice::Return - } -} - -impl Clear for JsonRpcErrorCount { - fn clear(&self) { - self.0.clear() - } -} - -impl Deref for JsonRpcErrorCount { - type Target = C; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} diff --git a/web3_proxy/src/metered/mod.rs b/web3_proxy/src/metered/mod.rs deleted file mode 100644 index f8f61bbc..00000000 --- a/web3_proxy/src/metered/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod jsonrpc_error_count; -mod provider_error_count; - -pub use self::jsonrpc_error_count::JsonRpcErrorCount; -pub use self::provider_error_count::ProviderErrorCount; diff --git a/web3_proxy/src/metered/provider_error_count.rs b/web3_proxy/src/metered/provider_error_count.rs deleted file mode 100644 index 9025c463..00000000 --- a/web3_proxy/src/metered/provider_error_count.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! A module providing the `JsonRpcErrorCount` metric. - -use ethers::providers::ProviderError; -use serde::Serialize; -use std::ops::Deref; - -/// A metric counting how many times an expression typed std `Result` as -/// returned an `Err` variant. -/// -/// This is a light-weight metric. -/// -/// By default, `ErrorCount` uses a lock-free `u64` `Counter`, which makes sense -/// in multithread scenarios. Non-threaded applications can gain performance by -/// using a `std::cell:Cell` instead. -#[derive(Clone, Default, Debug, Serialize)] -pub struct ProviderErrorCount>(pub C); - -impl Metric> for ProviderErrorCount {} - -impl Enter for ProviderErrorCount { - type E = (); - fn enter(&self) {} -} - -impl OnResult> for ProviderErrorCount { - /// Unlike the default ErrorCount, this one does not increment for internal jsonrpc errors - fn on_result(&self, _: (), r: &Result) -> Advice { - match r { - Ok(_) => {} - Err(ProviderError::JsonRpcClientError(_)) => {} - Err(_) => { - self.0.incr(); - } - } - Advice::Return - } -} - -impl Clear for ProviderErrorCount { - fn clear(&self) { - self.0.clear() - } -} - -impl Deref for ProviderErrorCount { - type Target = C; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} diff --git a/web3_proxy/src/pagerduty.rs b/web3_proxy/src/pagerduty.rs index 8716df90..8ee1e55f 100644 --- a/web3_proxy/src/pagerduty.rs +++ b/web3_proxy/src/pagerduty.rs @@ -1,6 +1,6 @@ use crate::config::TopConfig; use gethostname::gethostname; -use log::{debug, error}; +use log::{debug, error, warn}; use pagerduty_rs::eventsv2sync::EventsV2 as PagerdutySyncEventsV2; use pagerduty_rs::types::{AlertTrigger, AlertTriggerPayload, Event}; use serde::Serialize; @@ -157,8 +157,12 @@ pub fn pagerduty_alert( let group = chain_id.map(|x| format!("chain #{}", x)); - let source = - source.unwrap_or_else(|| gethostname().into_string().unwrap_or("unknown".to_string())); + let source = source.unwrap_or_else(|| { + gethostname().into_string().unwrap_or_else(|err| { + warn!("unable to handle hostname: {:#?}", err); + "unknown".to_string() + }) + }); let mut s = DefaultHasher::new(); // TODO: include severity here? diff --git a/web3_proxy/src/metrics_frontend.rs b/web3_proxy/src/prometheus.rs similarity index 50% rename from web3_proxy/src/metrics_frontend.rs rename to web3_proxy/src/prometheus.rs index cc2da646..2c582c24 100644 --- a/web3_proxy/src/metrics_frontend.rs +++ b/web3_proxy/src/prometheus.rs @@ -5,40 +5,31 @@ use axum::{routing::get, Extension, Router}; use log::info; use std::net::SocketAddr; use std::sync::Arc; +use tokio::sync::broadcast; use crate::app::Web3ProxyApp; /// Run a prometheus metrics server on the given port. - -pub async fn serve(app: Arc, port: u16) -> anyhow::Result<()> { - // build our application with a route - // order most to least common - // TODO: 404 any unhandled routes? +pub async fn serve( + app: Arc, + port: u16, + mut shutdown_receiver: broadcast::Receiver<()>, +) -> anyhow::Result<()> { + // routes should be ordered most to least common let app = Router::new().route("/", get(root)).layer(Extension(app)); - // run our app with hyper - // TODO: allow only listening on localhost? + // TODO: config for the host? let addr = SocketAddr::from(([0, 0, 0, 0], port)); info!("prometheus listening on port {}", port); - // TODO: into_make_service is enough if we always run behind a proxy. make into_make_service_with_connect_info optional? - /* - InsecureClientIp sequentially looks for an IP in: - - x-forwarded-for header (de-facto standard) - - x-real-ip header - - forwarded header (new standard) - - axum::extract::ConnectInfo (if not behind proxy) - - Since we run behind haproxy, x-forwarded-for will be set. - We probably won't need into_make_service_with_connect_info, but it shouldn't hurt. - */ - let service = app.into_make_service_with_connect_info::(); - // let service = app.into_make_service(); + let service = app.into_make_service(); // `axum::Server` is a re-export of `hyper::Server` axum::Server::bind(&addr) - // TODO: option to use with_connect_info. we want it in dev, but not when running behind a proxy, but not .serve(service) + .with_graceful_shutdown(async move { + let _ = shutdown_receiver.recv().await; + }) .await .map_err(Into::into) } diff --git a/web3_proxy/src/rpcs/blockchain.rs b/web3_proxy/src/rpcs/blockchain.rs index 3f1c79a9..04d0d401 100644 --- a/web3_proxy/src/rpcs/blockchain.rs +++ b/web3_proxy/src/rpcs/blockchain.rs @@ -1,6 +1,6 @@ +///! Keep track of the blockchain as seen by a Web3Rpcs. use super::consensus::ConsensusFinder; use super::many::Web3Rpcs; -///! Keep track of the blockchain as seen by a Web3Rpcs. use super::one::Web3Rpc; use super::transactions::TxStatus; use crate::frontend::authorization::Authorization; @@ -10,9 +10,9 @@ use derive_more::From; use ethers::prelude::{Block, TxHash, H256, U64}; use log::{debug, trace, warn, Level}; use moka::future::Cache; +use serde::ser::SerializeStruct; use serde::Serialize; use serde_json::json; -use std::time::{SystemTime, UNIX_EPOCH}; use std::{cmp::Ordering, fmt::Display, sync::Arc}; use tokio::sync::broadcast; use tokio::time::Duration; @@ -23,7 +23,7 @@ pub type ArcBlock = Arc>; pub type BlocksByHashCache = Cache; /// A block and its age. -#[derive(Clone, Debug, Default, From, Serialize)] +#[derive(Clone, Debug, Default, From)] pub struct Web3ProxyBlock { pub block: ArcBlock, /// number of seconds this block was behind the current time when received @@ -31,6 +31,29 @@ pub struct Web3ProxyBlock { pub received_age: Option, } +impl Serialize for Web3ProxyBlock { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + // TODO: i'm not sure about this name + let mut state = serializer.serialize_struct("saved_block", 2)?; + + state.serialize_field("age", &self.age())?; + + let block = json!({ + "block_hash": self.block.hash, + "parent_hash": self.block.parent_hash, + "number": self.block.number, + "timestamp": self.block.timestamp, + }); + + state.serialize_field("block", &block)?; + + state.end() + } +} + impl PartialEq for Web3ProxyBlock { fn eq(&self, other: &Self) -> bool { match (self.block.hash, other.block.hash) { @@ -63,16 +86,16 @@ impl Web3ProxyBlock { } pub fn age(&self) -> u64 { - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("there should always be time"); + let now = chrono::Utc::now().timestamp(); - let block_timestamp = Duration::from_secs(self.block.timestamp.as_u64()); + let block_timestamp = self.block.timestamp.as_u32() as i64; if block_timestamp < now { // this server is still syncing from too far away to serve requests // u64 is safe because ew checked equality above - (now - block_timestamp).as_secs() + // (now - block_timestamp).as_secs() + // u64 is safe because we checked equality above + (now - block_timestamp) as u64 } else { 0 } @@ -387,7 +410,7 @@ impl Web3Rpcs { return Ok(()); } - let new_synced_connections = consensus_finder + let new_consensus = consensus_finder .best_consensus_connections(authorization, self) .await .context("no consensus head block!") @@ -397,14 +420,14 @@ impl Web3Rpcs { err })?; - // TODO: what should we do if the block number of new_synced_connections is < old_synced_connections? wait? + // TODO: what should we do if the block number of new_consensus is < old_synced_connections? wait? let watch_consensus_head_sender = self.watch_consensus_head_sender.as_ref().unwrap(); - let consensus_tier = new_synced_connections.tier; + let consensus_tier = new_consensus.tier; let total_tiers = consensus_finder.len(); - let backups_needed = new_synced_connections.backups_needed; - let consensus_head_block = new_synced_connections.head_block.clone(); - let num_consensus_rpcs = new_synced_connections.num_conns(); + let backups_needed = new_consensus.backups_needed; + let consensus_head_block = new_consensus.head_block.clone(); + let num_consensus_rpcs = new_consensus.num_conns(); let mut num_synced_rpcs = 0; let num_active_rpcs = consensus_finder .all_rpcs_group() @@ -421,7 +444,7 @@ impl Web3Rpcs { let old_consensus_head_connections = self .watch_consensus_rpcs_sender - .send_replace(Some(Arc::new(new_synced_connections))); + .send_replace(Some(Arc::new(new_consensus))); let backups_voted_str = if backups_needed { "B " } else { "" }; diff --git a/web3_proxy/src/rpcs/consensus.rs b/web3_proxy/src/rpcs/consensus.rs index 5c6bf79e..373a1dd8 100644 --- a/web3_proxy/src/rpcs/consensus.rs +++ b/web3_proxy/src/rpcs/consensus.rs @@ -1,8 +1,7 @@ -use crate::frontend::authorization::Authorization; - use super::blockchain::Web3ProxyBlock; use super::many::Web3Rpcs; use super::one::Web3Rpc; +use crate::frontend::authorization::Authorization; use anyhow::Context; use ethers::prelude::{H256, U64}; use hashbrown::{HashMap, HashSet}; @@ -21,18 +20,22 @@ pub struct ConsensusWeb3Rpcs { // TODO: tier should be an option, or we should have consensus be stored as an Option pub(super) tier: u64, pub(super) head_block: Web3ProxyBlock, + // pub tier: u64, + // pub head_block: Option, // TODO: this should be able to serialize, but it isn't #[serde(skip_serializing)] - pub(super) rpcs: Vec>, - pub(super) backups_voted: Option, - pub(super) backups_needed: bool, + pub rpcs: Vec>, + pub backups_voted: Option, + pub backups_needed: bool, } impl ConsensusWeb3Rpcs { + #[inline(always)] pub fn num_conns(&self) -> usize { self.rpcs.len() } + #[inline(always)] pub fn sum_soft_limit(&self) -> u32 { self.rpcs.iter().fold(0, |sum, rpc| sum + rpc.soft_limit) } @@ -44,9 +47,9 @@ impl fmt::Debug for ConsensusWeb3Rpcs { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // TODO: the default formatter takes forever to write. this is too quiet though // TODO: print the actual conns? - f.debug_struct("ConsensusConnections") + f.debug_struct("ConsensusWeb3Rpcs") .field("head_block", &self.head_block) - .field("num_conns", &self.rpcs.len()) + .field("num_rpcs", &self.rpcs.len()) .finish_non_exhaustive() } } @@ -203,7 +206,7 @@ impl ConnectionsGroup { let mut primary_rpcs_voted: Option = None; let mut backup_rpcs_voted: Option = None; - // track rpcs on this heaviest chain so we can build a new ConsensusConnections + // track rpcs on this heaviest chain so we can build a new ConsensusWeb3Rpcs let mut primary_consensus_rpcs = HashSet::<&str>::new(); let mut backup_consensus_rpcs = HashSet::<&str>::new(); @@ -356,7 +359,7 @@ impl ConnectionsGroup { } } -/// A ConsensusConnections builder that tracks all connection heads across multiple groups of servers +/// A ConsensusWeb3Rpcs builder that tracks all connection heads across multiple groups of servers pub struct ConsensusFinder { /// backups for all tiers are only used if necessary /// tiers[0] = only tier 0. diff --git a/web3_proxy/src/rpcs/many.rs b/web3_proxy/src/rpcs/many.rs index 747b17fc..1a1b8354 100644 --- a/web3_proxy/src/rpcs/many.rs +++ b/web3_proxy/src/rpcs/many.rs @@ -2,8 +2,9 @@ use super::blockchain::{BlocksByHashCache, Web3ProxyBlock}; use super::consensus::ConsensusWeb3Rpcs; use super::one::Web3Rpc; -use super::request::{OpenRequestHandle, OpenRequestResult, RequestRevertHandler}; +use super::request::{OpenRequestHandle, OpenRequestResult, RequestErrorHandler}; use crate::app::{flatten_handle, AnyhowJoinHandle, Web3ProxyApp}; +///! Load balanced communication with a group of web3 providers use crate::config::{BlockAndRpc, TxHashAndRpc, Web3RpcConfig}; use crate::frontend::authorization::{Authorization, RequestMetadata}; use crate::frontend::rpc_proxy_ws::ProxyMode; @@ -87,7 +88,12 @@ impl Web3Rpcs { pending_transaction_cache: Cache, pending_tx_sender: Option>, watch_consensus_head_sender: Option>>, - ) -> anyhow::Result<(Arc, AnyhowJoinHandle<()>)> { + ) -> anyhow::Result<( + Arc, + AnyhowJoinHandle<()>, + watch::Receiver>>, + // watch::Receiver>, + )> { let (pending_tx_id_sender, pending_tx_id_receiver) = flume::unbounded(); let (block_sender, block_receiver) = flume::unbounded::(); @@ -161,7 +167,7 @@ impl Web3Rpcs { .max_capacity(10_000) .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); - let (watch_consensus_rpcs_sender, _) = watch::channel(Default::default()); + let (watch_consensus_rpcs_sender, consensus_connections_watcher) = watch::channel(Default::default()); // by_name starts empty. self.apply_server_configs will add to it let by_name = Default::default(); @@ -195,7 +201,7 @@ impl Web3Rpcs { }) }; - Ok((connections, handle)) + Ok((connections, handle, consensus_connections_watcher)) } /// update the rpcs in this group @@ -274,6 +280,10 @@ impl Web3Rpcs { }) .collect(); + // map of connection names to their connection + // let mut connections = HashMap::new(); + // let mut handles = vec![]; + while let Some(x) = spawn_handles.next().await { match x { Ok(Ok((rpc, _handle))) => { @@ -308,8 +318,43 @@ impl Web3Rpcs { } } +// <<<<<<< HEAD Ok(()) } +// ======= +// // TODO: max_capacity and time_to_idle from config +// // all block hashes are the same size, so no need for weigher +// let block_hashes = Cache::builder() +// .time_to_idle(Duration::from_secs(600)) +// .max_capacity(10_000) +// .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); +// // all block numbers are the same size, so no need for weigher +// let block_numbers = Cache::builder() +// .time_to_idle(Duration::from_secs(600)) +// .max_capacity(10_000) +// .build_with_hasher(hashbrown::hash_map::DefaultHashBuilder::default()); +// +// let (watch_consensus_connections_sender, consensus_connections_watcher) = +// watch::channel(Default::default()); +// +// let watch_consensus_head_receiver = +// watch_consensus_head_sender.as_ref().map(|x| x.subscribe()); +// +// let connections = Arc::new(Self { +// by_name: connections, +// watch_consensus_rpcs_sender: watch_consensus_connections_sender, +// watch_consensus_head_receiver, +// pending_transactions, +// block_hashes, +// block_numbers, +// min_sum_soft_limit, +// min_head_rpcs, +// max_block_age, +// max_block_lag, +// }); +// +// let authorization = Arc::new(Authorization::internal(db_conn.clone())?); +// >>>>>>> 77df3fa (stats v2) pub fn get(&self, conn_name: &str) -> Option> { self.by_name.read().get(conn_name).cloned() @@ -319,8 +364,12 @@ impl Web3Rpcs { self.by_name.read().len() } +// <<<<<<< HEAD pub fn is_empty(&self) -> bool { self.by_name.read().is_empty() +// ======= +// Ok((connections, handle, consensus_connections_watcher)) +// >>>>>>> 77df3fa (stats v2) } pub fn min_head_rpcs(&self) -> usize { @@ -655,9 +704,7 @@ impl Web3Rpcs { trace!("{} vs {}", rpc_a, rpc_b); // TODO: cached key to save a read lock // TODO: ties to the server with the smallest block_data_limit - let best_rpc = min_by_key(rpc_a, rpc_b, |x| { - OrderedFloat(x.head_latency.read().value()) - }); + let best_rpc = min_by_key(rpc_a, rpc_b, |x| x.peak_ewma()); trace!("winner: {}", best_rpc); // just because it has lower latency doesn't mean we are sure to get a connection @@ -671,7 +718,7 @@ impl Web3Rpcs { } Ok(OpenRequestResult::NotReady) => { // TODO: log a warning? emit a stat? - trace!("best_rpc not ready"); + trace!("best_rpc not ready: {}", best_rpc); } Err(err) => { warn!("No request handle for {}. err={:?}", best_rpc, err) @@ -837,7 +884,11 @@ impl Web3Rpcs { // TODO: maximum retries? right now its the total number of servers loop { +// <<<<<<< HEAD if skip_rpcs.len() >= self.by_name.read().len() { +// ======= +// if skip_rpcs.len() == self.by_name.len() { +// >>>>>>> 77df3fa (stats v2) break; } @@ -854,11 +905,10 @@ impl Web3Rpcs { OpenRequestResult::Handle(active_request_handle) => { // save the rpc in case we get an error and want to retry on another server // TODO: look at backend_requests instead - skip_rpcs.push(active_request_handle.clone_connection()); + let rpc = active_request_handle.clone_connection(); + skip_rpcs.push(rpc.clone()); if let Some(request_metadata) = request_metadata { - let rpc = active_request_handle.clone_connection(); - request_metadata .response_from_backup_rpc .store(rpc.backup, Ordering::Release); @@ -871,7 +921,7 @@ impl Web3Rpcs { .request( &request.method, &json!(request.params), - RequestRevertHandler::Save, + RequestErrorHandler::SaveRevert, None, ) .await; @@ -1109,9 +1159,18 @@ impl Web3Rpcs { request_metadata.no_servers.fetch_add(1, Ordering::Release); } +// <<<<<<< HEAD watch_consensus_rpcs.changed().await?; watch_consensus_rpcs.borrow_and_update(); +// ======= + // TODO: i don't think this will ever happen + // TODO: return a 502? if it does? + // return Err(anyhow::anyhow!("no available rpcs!")); + // TODO: sleep how long? + // TODO: subscribe to something in ConsensusWeb3Rpcs instead + sleep(Duration::from_millis(200)).await; +// >>>>>>> 77df3fa (stats v2) continue; } @@ -1239,13 +1298,14 @@ fn rpc_sync_status_sort_key(x: &Arc) -> (U64, u64, bool, OrderedFloat>>, - /// keep track of hard limits + /// keep track of hard limits. Optional because we skip this code for our own servers. pub(super) hard_limit_until: Option>, /// rate limits are stored in a central redis so that multiple proxies can share their rate limits /// We do not use the deferred rate limiter because going over limits would cause errors @@ -241,8 +242,12 @@ impl Web3Rpc { block_data_limit, reconnect, tier: config.tier, +// <<<<<<< HEAD disconnect_watch: Some(disconnect_sender), created_at: Some(created_at), +// ======= + head_block: RwLock::new(Default::default()), +// >>>>>>> 77df3fa (stats v2) ..Default::default() }; @@ -272,7 +277,7 @@ impl Web3Rpc { Ok((new_connection, handle)) } - pub async fn peak_ewma(&self) -> OrderedFloat { + pub fn peak_ewma(&self) -> OrderedFloat { // TODO: use request instead of head latency? that was killing perf though let head_ewma = self.head_latency.read().value(); @@ -392,6 +397,12 @@ impl Web3Rpc { // this rpc doesn't have that block yet. still syncing if needed_block_num > &head_block_num { + trace!( + "{} has head {} but needs {}", + self, + head_block_num, + needed_block_num, + ); return false; } @@ -400,7 +411,17 @@ impl Web3Rpc { let oldest_block_num = head_block_num.saturating_sub(block_data_limit); - *needed_block_num >= oldest_block_num + if needed_block_num < &oldest_block_num { + trace!( + "{} needs {} but the oldest available is {}", + self, + needed_block_num, + oldest_block_num + ); + return false; + } + + true } /// reconnect to the provider. errors are retried forever with exponential backoff with jitter. @@ -439,7 +460,8 @@ impl Web3Rpc { // retry until we succeed while let Err(err) = self.connect(block_sender, chain_id, db_conn).await { - // thread_rng is crytographically secure. we don't need that here + // thread_rng is crytographically secure. we don't need that here. use thread_fast_rng instead + // TODO: min of 1 second? sleep longer if rate limited? sleep_ms = min( cap_ms, thread_fast_rng().gen_range(base_ms..(sleep_ms * range_multiplier)), @@ -455,7 +477,7 @@ impl Web3Rpc { log::log!( error_level, - "Failed reconnect to {}! Retry in {}ms. err={:?}", + "Failed (re)connect to {}! Retry in {}ms. err={:?}", self, retry_in.as_millis(), err, @@ -695,10 +717,10 @@ impl Web3Rpc { http_interval_sender: Option>>, tx_id_sender: Option)>>, ) -> anyhow::Result<()> { - let revert_handler = if self.backup { - RequestRevertHandler::DebugLevel + let error_handler = if self.backup { + RequestErrorHandler::DebugLevel } else { - RequestRevertHandler::ErrorLevel + RequestErrorHandler::ErrorLevel }; loop { @@ -768,7 +790,7 @@ impl Web3Rpc { .wait_for_query::<_, Option>( "eth_getTransactionByHash", &(txid,), - revert_handler, + error_handler, authorization.clone(), Some(client.clone()), ) @@ -805,7 +827,7 @@ impl Web3Rpc { rpc.wait_for_query::<_, Option>( "eth_getCode", &(to, block_number), - revert_handler, + error_handler, authorization.clone(), Some(client), ) @@ -1200,7 +1222,11 @@ impl Web3Rpc { } if let Some(hard_limit_until) = self.hard_limit_until.as_ref() { +// <<<<<<< HEAD let hard_limit_ready = *hard_limit_until.borrow(); +// ======= +// let hard_limit_ready = hard_limit_until.borrow().to_owned(); +// >>>>>>> 77df3fa (stats v2) let now = Instant::now(); @@ -1285,7 +1311,7 @@ impl Web3Rpc { self: &Arc, method: &str, params: &P, - revert_handler: RequestRevertHandler, + revert_handler: RequestErrorHandler, authorization: Arc, unlocked_provider: Option>, ) -> anyhow::Result @@ -1350,7 +1376,7 @@ impl Serialize for Web3Rpc { S: Serializer, { // 3 is the number of fields in the struct. - let mut state = serializer.serialize_struct("Web3Rpc", 10)?; + let mut state = serializer.serialize_struct("Web3Rpc", 9)?; // the url is excluded because it likely includes private information. just show the name that we use in keys state.serialize_field("name", &self.name)?; @@ -1414,15 +1440,10 @@ mod tests { #![allow(unused_imports)] use super::*; use ethers::types::{Block, U256}; - use std::time::{SystemTime, UNIX_EPOCH}; #[test] fn test_archive_node_has_block_data() { - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("cannot tell the time") - .as_secs() - .into(); + let now = chrono::Utc::now().timestamp().into(); let random_block = Block { hash: Some(H256::random()), @@ -1457,11 +1478,7 @@ mod tests { #[test] fn test_pruned_node_has_block_data() { - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("cannot tell the time") - .as_secs() - .into(); + let now = chrono::Utc::now().timestamp().into(); let head_block: Web3ProxyBlock = Arc::new(Block { hash: Some(H256::random()), @@ -1498,11 +1515,7 @@ mod tests { // TODO: think about how to bring the concept of a "lagged" node back #[test] fn test_lagged_node_not_has_block_data() { - let now: U256 = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("cannot tell the time") - .as_secs() - .into(); + let now = chrono::Utc::now().timestamp().into(); // head block is an hour old let head_block = Block { @@ -1514,7 +1527,7 @@ mod tests { let head_block = Arc::new(head_block); - let head_block = SavedBlock::new(head_block); + let head_block = Web3ProxyBlock::new(head_block); let block_data_limit = u64::MAX; let metrics = OpenRequestHandleMetrics::default(); diff --git a/web3_proxy/src/rpcs/request.rs b/web3_proxy/src/rpcs/request.rs index 139e3bba..f671626b 100644 --- a/web3_proxy/src/rpcs/request.rs +++ b/web3_proxy/src/rpcs/request.rs @@ -11,6 +11,7 @@ use log::{debug, error, trace, warn, Level}; use migration::sea_orm::{self, ActiveEnum, ActiveModelTrait}; use serde_json::json; use std::fmt; +use std::sync::atomic; use std::sync::Arc; use thread_fast_rng::rand::Rng; use tokio::time::{sleep, Duration, Instant}; @@ -34,7 +35,7 @@ pub struct OpenRequestHandle { /// Depending on the context, RPC errors can require different handling. #[derive(Copy, Clone)] -pub enum RequestRevertHandler { +pub enum RequestErrorHandler { /// Log at the trace level. Use when errors are expected. TraceLevel, /// Log at the debug level. Use when errors are expected. @@ -44,7 +45,7 @@ pub enum RequestRevertHandler { /// Log at the warn level. Use when errors do not cause problems. WarnLevel, /// Potentially save the revert. Users can tune how often this happens - Save, + SaveRevert, } // TODO: second param could be skipped since we don't need it here @@ -57,13 +58,13 @@ struct EthCallFirstParams { data: Option, } -impl From for RequestRevertHandler { +impl From for RequestErrorHandler { fn from(level: Level) -> Self { match level { - Level::Trace => RequestRevertHandler::TraceLevel, - Level::Debug => RequestRevertHandler::DebugLevel, - Level::Error => RequestRevertHandler::ErrorLevel, - Level::Warn => RequestRevertHandler::WarnLevel, + Level::Trace => RequestErrorHandler::TraceLevel, + Level::Debug => RequestErrorHandler::DebugLevel, + Level::Error => RequestErrorHandler::ErrorLevel, + Level::Warn => RequestErrorHandler::WarnLevel, _ => unimplemented!("unexpected tracing Level"), } } @@ -121,11 +122,15 @@ impl Authorization { } impl OpenRequestHandle { - pub async fn new(authorization: Arc, conn: Arc) -> Self { - Self { - authorization, - rpc: conn, - } + pub async fn new(authorization: Arc, rpc: Arc) -> Self { + // TODO: take request_id as an argument? + // TODO: attach a unique id to this? customer requests have one, but not internal queries + // TODO: what ordering?! + // TODO: should we be using metered, or not? i think not because we want stats for each handle + // TODO: these should maybe be sent to an influxdb instance? + rpc.active_requests.fetch_add(1, atomic::Ordering::Relaxed); + + Self { authorization, rpc } } pub fn connection_name(&self) -> String { @@ -140,11 +145,12 @@ impl OpenRequestHandle { /// Send a web3 request /// By having the request method here, we ensure that the rate limiter was called and connection counts were properly incremented /// depending on how things are locked, you might need to pass the provider in + /// we take self to ensure this function only runs once pub async fn request( self, method: &str, params: &P, - revert_handler: RequestRevertHandler, + revert_handler: RequestErrorHandler, unlocked_provider: Option>, ) -> Result where @@ -154,7 +160,7 @@ impl OpenRequestHandle { { // TODO: use tracing spans // TODO: including params in this log is way too verbose - // trace!(rpc=%self.conn, %method, "request"); + // trace!(rpc=%self.rpc, %method, "request"); trace!("requesting from {}", self.rpc); let mut provider = if unlocked_provider.is_some() { @@ -209,7 +215,7 @@ impl OpenRequestHandle { // // TODO: i think ethers already has trace logging (and does it much more fancy) // trace!( // "response from {} for {} {:?}: {:?}", - // self.conn, + // self.rpc, // method, // params, // response, @@ -218,17 +224,17 @@ impl OpenRequestHandle { if let Err(err) = &response { // only save reverts for some types of calls // TODO: do something special for eth_sendRawTransaction too - let revert_handler = if let RequestRevertHandler::Save = revert_handler { + let error_handler = if let RequestErrorHandler::SaveRevert = revert_handler { // TODO: should all these be Trace or Debug or a mix? if !["eth_call", "eth_estimateGas"].contains(&method) { // trace!(%method, "skipping save on revert"); - RequestRevertHandler::TraceLevel + RequestErrorHandler::TraceLevel } else if self.authorization.db_conn.is_some() { let log_revert_chance = self.authorization.checks.log_revert_chance; if log_revert_chance == 0.0 { // trace!(%method, "no chance. skipping save on revert"); - RequestRevertHandler::TraceLevel + RequestErrorHandler::TraceLevel } else if log_revert_chance == 1.0 { // trace!(%method, "gaurenteed chance. SAVING on revert"); revert_handler @@ -236,7 +242,7 @@ impl OpenRequestHandle { < log_revert_chance { // trace!(%method, "missed chance. skipping save on revert"); - RequestRevertHandler::TraceLevel + RequestErrorHandler::TraceLevel } else { // trace!("Saving on revert"); // TODO: is always logging at debug level fine? @@ -244,19 +250,22 @@ impl OpenRequestHandle { } } else { // trace!(%method, "no database. skipping save on revert"); - RequestRevertHandler::TraceLevel + RequestErrorHandler::TraceLevel } } else { revert_handler }; - enum ResponseTypes { + // TODO: simple enum -> string derive? + #[derive(Debug)] + enum ResponseErrorType { Revert, RateLimit, - Ok, + Error, } // check for "execution reverted" here + // TODO: move this info a function on ResponseErrorType let response_type = if let ProviderError::JsonRpcClientError(err) = err { // Http and Ws errors are very similar, but different types let msg = match &*provider { @@ -298,87 +307,127 @@ impl OpenRequestHandle { if let Some(msg) = msg { if msg.starts_with("execution reverted") { trace!("revert from {}", self.rpc); - ResponseTypes::Revert + ResponseErrorType::Revert } else if msg.contains("limit") || msg.contains("request") { trace!("rate limit from {}", self.rpc); - ResponseTypes::RateLimit + ResponseErrorType::RateLimit } else { - ResponseTypes::Ok + ResponseErrorType::Error } } else { - ResponseTypes::Ok + ResponseErrorType::Error } } else { - ResponseTypes::Ok + ResponseErrorType::Error }; - if matches!(response_type, ResponseTypes::RateLimit) { - if let Some(hard_limit_until) = self.rpc.hard_limit_until.as_ref() { - let retry_at = Instant::now() + Duration::from_secs(1); + match response_type { + ResponseErrorType::RateLimit => { + if let Some(hard_limit_until) = self.rpc.hard_limit_until.as_ref() { + // TODO: how long? different providers have different rate limiting periods, though most seem to be 1 second + // TODO: until the next second, or wait 1 whole second? + let retry_at = Instant::now() + Duration::from_secs(1); - trace!("retry {} at: {:?}", self.rpc, retry_at); + trace!("retry {} at: {:?}", self.rpc, retry_at); - hard_limit_until.send_replace(retry_at); - } - } - - // TODO: think more about the method and param logs. those can be sensitive information - match revert_handler { - RequestRevertHandler::DebugLevel => { - // TODO: think about this revert check more. sometimes we might want reverts logged so this needs a flag - if matches!(response_type, ResponseTypes::Revert) { - debug!( - "bad response from {}! method={} params={:?} err={:?}", - self.rpc, method, params, err - ); + hard_limit_until.send_replace(retry_at); } } - RequestRevertHandler::TraceLevel => { - trace!( - "bad response from {}! method={} params={:?} err={:?}", - self.rpc, - method, - params, - err - ); + ResponseErrorType::Error => { + // TODO: should we just have Error or RateLimit? do we need Error and Revert separate? + + match error_handler { + RequestErrorHandler::DebugLevel => { + // TODO: include params only if not running in release mode + debug!( + "error response from {}! method={} params={:?} err={:?}", + self.rpc, method, params, err + ); + } + RequestErrorHandler::TraceLevel => { + trace!( + "error response from {}! method={} params={:?} err={:?}", + self.rpc, + method, + params, + err + ); + } + RequestErrorHandler::ErrorLevel => { + // TODO: include params only if not running in release mode + error!( + "error response from {}! method={} err={:?}", + self.rpc, method, err + ); + } + RequestErrorHandler::SaveRevert | RequestErrorHandler::WarnLevel => { + // TODO: include params only if not running in release mode + warn!( + "error response from {}! method={} err={:?}", + self.rpc, method, err + ); + } + } } - RequestRevertHandler::ErrorLevel => { - // TODO: include params if not running in release mode - error!( - "bad response from {}! method={} err={:?}", - self.rpc, method, err - ); - } - RequestRevertHandler::WarnLevel => { - // TODO: include params if not running in release mode - warn!( - "bad response from {}! method={} err={:?}", - self.rpc, method, err - ); - } - RequestRevertHandler::Save => { - trace!( - "bad response from {}! method={} params={:?} err={:?}", - self.rpc, - method, - params, - err - ); + ResponseErrorType::Revert => { + match error_handler { + RequestErrorHandler::DebugLevel => { + // TODO: include params only if not running in release mode + debug!( + "revert response from {}! method={} params={:?} err={:?}", + self.rpc, method, params, err + ); + } + RequestErrorHandler::TraceLevel => { + trace!( + "revert response from {}! method={} params={:?} err={:?}", + self.rpc, + method, + params, + err + ); + } + RequestErrorHandler::ErrorLevel => { + // TODO: include params only if not running in release mode + error!( + "revert response from {}! method={} err={:?}", + self.rpc, method, err + ); + } + RequestErrorHandler::WarnLevel => { + // TODO: include params only if not running in release mode + warn!( + "revert response from {}! method={} err={:?}", + self.rpc, method, err + ); + } + RequestErrorHandler::SaveRevert => { + trace!( + "revert response from {}! method={} params={:?} err={:?}", + self.rpc, + method, + params, + err + ); - // TODO: do not unwrap! (doesn't matter much since we check method as a string above) - let method: Method = Method::try_from_value(&method.to_string()).unwrap(); + // TODO: do not unwrap! (doesn't matter much since we check method as a string above) + let method: Method = + Method::try_from_value(&method.to_string()).unwrap(); - // TODO: DO NOT UNWRAP! But also figure out the best way to keep returning ProviderErrors here - let params: EthCallParams = serde_json::from_value(json!(params)) - .context("parsing params to EthCallParams") - .unwrap(); + // TODO: DO NOT UNWRAP! But also figure out the best way to keep returning ProviderErrors here + let params: EthCallParams = serde_json::from_value(json!(params)) + .context("parsing params to EthCallParams") + .unwrap(); - // spawn saving to the database so we don't slow down the request - let f = self.authorization.clone().save_revert(method, params.0 .0); + // spawn saving to the database so we don't slow down the request + let f = self.authorization.clone().save_revert(method, params.0 .0); - tokio::spawn(f); + tokio::spawn(f); + } + } } } + // TODO: track error latency? } else { // TODO: record request latency // let latency_ms = start.elapsed().as_secs_f64() * 1000.0; diff --git a/web3_proxy/src/user_queries.rs b/web3_proxy/src/stats/db_queries.rs similarity index 55% rename from web3_proxy/src/user_queries.rs rename to web3_proxy/src/stats/db_queries.rs index 42bc8893..599b3cff 100644 --- a/web3_proxy/src/user_queries.rs +++ b/web3_proxy/src/stats/db_queries.rs @@ -1,6 +1,9 @@ -use crate::app::DatabaseReplica; +use crate::app::Web3ProxyApp; use crate::frontend::errors::FrontendErrorResponse; -use crate::{app::Web3ProxyApp, user_token::UserBearerToken}; +use crate::http_params::{ + get_chain_id_from_params, get_page_from_params, get_query_start_from_params, + get_query_window_seconds_from_params, get_user_id_from_params, +}; use anyhow::Context; use axum::response::{IntoResponse, Response}; use axum::Json; @@ -8,215 +11,217 @@ use axum::{ headers::{authorization::Bearer, Authorization}, TypedHeader, }; -use chrono::{NaiveDateTime, Utc}; -use entities::{login, rpc_accounting, rpc_key}; +use entities::{rpc_accounting, rpc_key}; use hashbrown::HashMap; use http::StatusCode; -use log::{debug, warn}; +use log::warn; use migration::sea_orm::{ - ColumnTrait, DatabaseConnection, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder, - QuerySelect, Select, + ColumnTrait, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder, QuerySelect, Select, }; use migration::{Condition, Expr, SimpleExpr}; use redis_rate_limiter::redis; -use redis_rate_limiter::{redis::AsyncCommands, RedisConnection}; +use redis_rate_limiter::redis::AsyncCommands; use serde_json::json; -/// get the attached address for the given bearer token. -/// First checks redis. Then checks the database. -/// 0 means all users. -/// This authenticates that the bearer is allowed to view this user_id's stats -pub async fn get_user_id_from_params( - redis_conn: &mut RedisConnection, - db_conn: &DatabaseConnection, - db_replica: &DatabaseReplica, - // this is a long type. should we strip it down? - bearer: Option>>, - params: &HashMap, -) -> Result { - debug!("bearer and params are: {:?} {:?}", bearer, params); - match (bearer, params.get("user_id")) { - (Some(TypedHeader(Authorization(bearer))), Some(user_id)) => { - // check for the bearer cache key - let user_bearer_token = UserBearerToken::try_from(bearer)?; - - let user_redis_key = user_bearer_token.redis_key(); - - let mut save_to_redis = false; - - // get the user id that is attached to this bearer token - let bearer_user_id = match redis_conn.get::<_, u64>(&user_redis_key).await { - Err(_) => { - // TODO: inspect the redis error? if redis is down we should warn - // this also means redis being down will not kill our app. Everything will need a db read query though. - - let user_login = login::Entity::find() - .filter(login::Column::BearerToken.eq(user_bearer_token.uuid())) - .one(db_replica.conn()) - .await - .context("database error while querying for user")? - .ok_or(FrontendErrorResponse::AccessDenied)?; - - // if expired, delete ALL expired logins - let now = Utc::now(); - if now > user_login.expires_at { - // this row is expired! do not allow auth! - // delete ALL expired logins. - let delete_result = login::Entity::delete_many() - .filter(login::Column::ExpiresAt.lte(now)) - .exec(db_conn) - .await?; - - // TODO: emit a stat? if this is high something weird might be happening - debug!("cleared expired logins: {:?}", delete_result); - - return Err(FrontendErrorResponse::AccessDenied); - } - - save_to_redis = true; - - user_login.user_id - } - Ok(x) => { - // TODO: push cache ttl further in the future? - x - } - }; - - let user_id: u64 = user_id.parse().context("Parsing user_id param")?; - - if bearer_user_id != user_id { - return Err(FrontendErrorResponse::AccessDenied); - } - - if save_to_redis { - // TODO: how long? we store in database for 4 weeks - const ONE_DAY: usize = 60 * 60 * 24; - - if let Err(err) = redis_conn - .set_ex::<_, _, ()>(user_redis_key, user_id, ONE_DAY) - .await - { - warn!("Unable to save user bearer token to redis: {}", err) - } - } - - Ok(bearer_user_id) - } - (_, None) => { - // they have a bearer token. we don't care about it on public pages - // 0 means all - Ok(0) - } - (None, Some(_)) => { - // they do not have a bearer token, but requested a specific id. block - // TODO: proper error code from a useful error code - // TODO: maybe instead of this sharp edged warn, we have a config value? - // TODO: check config for if we should deny or allow this - Err(FrontendErrorResponse::AccessDenied) - // // TODO: make this a flag - // warn!("allowing without auth during development!"); - // Ok(x.parse()?) - } - } -} - -/// only allow rpc_key to be set if user_id is also set. -/// this will keep people from reading someone else's keys. -/// 0 means none. - -pub fn get_rpc_key_id_from_params( - user_id: u64, - params: &HashMap, -) -> anyhow::Result { - if user_id > 0 { - params.get("rpc_key_id").map_or_else( - || Ok(0), - |c| { - let c = c.parse()?; - - Ok(c) - }, - ) - } else { - Ok(0) - } -} - -pub fn get_chain_id_from_params( - app: &Web3ProxyApp, - params: &HashMap, -) -> anyhow::Result { - params.get("chain_id").map_or_else( - || Ok(app.config.chain_id), - |c| { - let c = c.parse()?; - - Ok(c) - }, - ) -} - -pub fn get_query_start_from_params( - params: &HashMap, -) -> anyhow::Result { - params.get("query_start").map_or_else( - || { - // no timestamp in params. set default - let x = chrono::Utc::now() - chrono::Duration::days(30); - - Ok(x.naive_utc()) - }, - |x: &String| { - // parse the given timestamp - let x = x.parse::().context("parsing timestamp query param")?; - - // TODO: error code 401 - let x = - NaiveDateTime::from_timestamp_opt(x, 0).context("parsing timestamp query param")?; - - Ok(x) - }, - ) -} - -pub fn get_page_from_params(params: &HashMap) -> anyhow::Result { - params.get("page").map_or_else::, _, _>( - || { - // no page in params. set default - Ok(0) - }, - |x: &String| { - // parse the given timestamp - // TODO: error code 401 - let x = x.parse().context("parsing page query from params")?; - - Ok(x) - }, - ) -} - -pub fn get_query_window_seconds_from_params( - params: &HashMap, -) -> Result { - params.get("query_window_seconds").map_or_else( - || { - // no page in params. set default - Ok(0) - }, - |query_window_seconds: &String| { - // parse the given timestamp - // TODO: error code 401 - query_window_seconds.parse::().map_err(|e| { - FrontendErrorResponse::StatusCode( - StatusCode::BAD_REQUEST, - "Unable to parse rpc_key_id".to_string(), - Some(e.into()), - ) - }) - }, - ) -} +// <<<<<<< HEAD:web3_proxy/src/user_queries.rs +// /// get the attached address for the given bearer token. +// /// First checks redis. Then checks the database. +// /// 0 means all users. +// /// This authenticates that the bearer is allowed to view this user_id's stats +// pub async fn get_user_id_from_params( +// redis_conn: &mut RedisConnection, +// db_conn: &DatabaseConnection, +// db_replica: &DatabaseReplica, +// // this is a long type. should we strip it down? +// bearer: Option>>, +// params: &HashMap, +// ) -> Result { +// debug!("bearer and params are: {:?} {:?}", bearer, params); +// match (bearer, params.get("user_id")) { +// (Some(TypedHeader(Authorization(bearer))), Some(user_id)) => { +// // check for the bearer cache key +// let user_bearer_token = UserBearerToken::try_from(bearer)?; +// +// let user_redis_key = user_bearer_token.redis_key(); +// +// let mut save_to_redis = false; +// +// // get the user id that is attached to this bearer token +// let bearer_user_id = match redis_conn.get::<_, u64>(&user_redis_key).await { +// Err(_) => { +// // TODO: inspect the redis error? if redis is down we should warn +// // this also means redis being down will not kill our app. Everything will need a db read query though. +// +// let user_login = login::Entity::find() +// .filter(login::Column::BearerToken.eq(user_bearer_token.uuid())) +// .one(db_replica.conn()) +// .await +// .context("database error while querying for user")? +// .ok_or(FrontendErrorResponse::AccessDenied)?; +// +// // if expired, delete ALL expired logins +// let now = Utc::now(); +// if now > user_login.expires_at { +// // this row is expired! do not allow auth! +// // delete ALL expired logins. +// let delete_result = login::Entity::delete_many() +// .filter(login::Column::ExpiresAt.lte(now)) +// .exec(db_conn) +// .await?; +// +// // TODO: emit a stat? if this is high something weird might be happening +// debug!("cleared expired logins: {:?}", delete_result); +// +// return Err(FrontendErrorResponse::AccessDenied); +// } +// +// save_to_redis = true; +// +// user_login.user_id +// } +// Ok(x) => { +// // TODO: push cache ttl further in the future? +// x +// } +// }; +// +// let user_id: u64 = user_id.parse().context("Parsing user_id param")?; +// +// if bearer_user_id != user_id { +// return Err(FrontendErrorResponse::AccessDenied); +// } +// +// if save_to_redis { +// // TODO: how long? we store in database for 4 weeks +// const ONE_DAY: usize = 60 * 60 * 24; +// +// if let Err(err) = redis_conn +// .set_ex::<_, _, ()>(user_redis_key, user_id, ONE_DAY) +// .await +// { +// warn!("Unable to save user bearer token to redis: {}", err) +// } +// } +// +// Ok(bearer_user_id) +// } +// (_, None) => { +// // they have a bearer token. we don't care about it on public pages +// // 0 means all +// Ok(0) +// } +// (None, Some(_)) => { +// // they do not have a bearer token, but requested a specific id. block +// // TODO: proper error code from a useful error code +// // TODO: maybe instead of this sharp edged warn, we have a config value? +// // TODO: check config for if we should deny or allow this +// Err(FrontendErrorResponse::AccessDenied) +// // // TODO: make this a flag +// // warn!("allowing without auth during development!"); +// // Ok(x.parse()?) +// } +// } +// } +// +// /// only allow rpc_key to be set if user_id is also set. +// /// this will keep people from reading someone else's keys. +// /// 0 means none. +// +// pub fn get_rpc_key_id_from_params( +// user_id: u64, +// params: &HashMap, +// ) -> anyhow::Result { +// if user_id > 0 { +// params.get("rpc_key_id").map_or_else( +// || Ok(0), +// |c| { +// let c = c.parse()?; +// +// Ok(c) +// }, +// ) +// } else { +// Ok(0) +// } +// } +// +// pub fn get_chain_id_from_params( +// app: &Web3ProxyApp, +// params: &HashMap, +// ) -> anyhow::Result { +// params.get("chain_id").map_or_else( +// || Ok(app.config.chain_id), +// |c| { +// let c = c.parse()?; +// +// Ok(c) +// }, +// ) +// } +// +// pub fn get_query_start_from_params( +// params: &HashMap, +// ) -> anyhow::Result { +// params.get("query_start").map_or_else( +// || { +// // no timestamp in params. set default +// let x = chrono::Utc::now() - chrono::Duration::days(30); +// +// Ok(x.naive_utc()) +// }, +// |x: &String| { +// // parse the given timestamp +// let x = x.parse::().context("parsing timestamp query param")?; +// +// // TODO: error code 401 +// let x = +// NaiveDateTime::from_timestamp_opt(x, 0).context("parsing timestamp query param")?; +// +// Ok(x) +// }, +// ) +// } +// +// pub fn get_page_from_params(params: &HashMap) -> anyhow::Result { +// params.get("page").map_or_else::, _, _>( +// || { +// // no page in params. set default +// Ok(0) +// }, +// |x: &String| { +// // parse the given timestamp +// // TODO: error code 401 +// let x = x.parse().context("parsing page query from params")?; +// +// Ok(x) +// }, +// ) +// } +// +// pub fn get_query_window_seconds_from_params( +// params: &HashMap, +// ) -> Result { +// params.get("query_window_seconds").map_or_else( +// || { +// // no page in params. set default +// Ok(0) +// }, +// |query_window_seconds: &String| { +// // parse the given timestamp +// // TODO: error code 401 +// query_window_seconds.parse::().map_err(|e| { +// FrontendErrorResponse::StatusCode( +// StatusCode::BAD_REQUEST, +// "Unable to parse rpc_key_id".to_string(), +// Some(e.into()), +// ) +// }) +// }, +// ) +// } +// ======= +use super::StatType; +// >>>>>>> 77df3fa (stats v2):web3_proxy/src/stats/db_queries.rs pub fn filter_query_window_seconds( query_window_seconds: u64, @@ -251,16 +256,11 @@ pub fn filter_query_window_seconds( Ok(q) } -pub enum StatResponse { - Aggregated, - Detailed, -} - pub async fn query_user_stats<'a>( app: &'a Web3ProxyApp, bearer: Option>>, params: &'a HashMap, - stat_response_type: StatResponse, + stat_response_type: StatType, ) -> Result { let db_conn = app.db_conn().context("query_user_stats needs a db")?; let db_replica = app @@ -361,7 +361,7 @@ pub async fn query_user_stats<'a>( // TODO: make this and q mutable and clean up the code below. no need for more `let q` let mut condition = Condition::all(); - if let StatResponse::Detailed = stat_response_type { + if let StatType::Detailed = stat_response_type { // group by the columns that we use as keys in other places of the code q = q .column(rpc_accounting::Column::ErrorResponse) diff --git a/web3_proxy/src/stats/influxdb_queries.rs b/web3_proxy/src/stats/influxdb_queries.rs new file mode 100644 index 00000000..a5833d44 --- /dev/null +++ b/web3_proxy/src/stats/influxdb_queries.rs @@ -0,0 +1,41 @@ +use super::StatType; +use crate::{ + app::Web3ProxyApp, frontend::errors::FrontendErrorResponse, + http_params::get_user_id_from_params, +}; +use anyhow::Context; +use axum::{ + headers::{authorization::Bearer, Authorization}, + response::Response, + TypedHeader, +}; +use hashbrown::HashMap; + +pub async fn query_user_stats<'a>( + app: &'a Web3ProxyApp, + bearer: Option>>, + params: &'a HashMap, + stat_response_type: StatType, +) -> Result { + let db_conn = app.db_conn().context("query_user_stats needs a db")?; + let db_replica = app + .db_replica() + .context("query_user_stats needs a db replica")?; + let mut redis_conn = app + .redis_conn() + .await + .context("query_user_stats had a redis connection error")? + .context("query_user_stats needs a redis")?; + + // TODO: have a getter for this. do we need a connection pool on it? + let influxdb_client = app + .influxdb_client + .as_ref() + .context("query_user_stats needs an influxdb client")?; + + // get the user id first. if it is 0, we should use a cache on the app + let user_id = + get_user_id_from_params(&mut redis_conn, &db_conn, &db_replica, bearer, params).await?; + + todo!(); +} diff --git a/web3_proxy/src/stats/mod.rs b/web3_proxy/src/stats/mod.rs new file mode 100644 index 00000000..a34959ca --- /dev/null +++ b/web3_proxy/src/stats/mod.rs @@ -0,0 +1,584 @@ +//! Store "stats" in a database for billing and a different database for graphing +//! +//! TODO: move some of these structs/functions into their own file? +pub mod db_queries; +pub mod influxdb_queries; + +use crate::frontend::authorization::{Authorization, RequestMetadata}; +use axum::headers::Origin; +use chrono::{TimeZone, Utc}; +use derive_more::From; +use entities::rpc_accounting_v2; +use entities::sea_orm_active_enums::TrackingLevel; +use futures::stream; +use hashbrown::HashMap; +use influxdb2::api::write::TimestampPrecision; +use influxdb2::models::DataPoint; +use log::{error, info}; +use migration::sea_orm::{self, DatabaseConnection, EntityTrait}; +use migration::{Expr, OnConflict}; +use std::num::NonZeroU64; +use std::sync::atomic::Ordering; +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::broadcast; +use tokio::task::JoinHandle; +use tokio::time::interval; + +pub enum StatType { + Aggregated, + Detailed, +} + +/// TODO: better name? +#[derive(Clone, Debug)] +pub struct RpcQueryStats { + authorization: Arc, + method: String, + archive_request: bool, + error_response: bool, + request_bytes: u64, + /// if backend_requests is 0, there was a cache_hit + backend_requests: u64, + response_bytes: u64, + response_millis: u64, + response_timestamp: i64, +} + +#[derive(Clone, From, Hash, PartialEq, Eq)] +struct RpcQueryKey { + /// unix epoch time + /// for the time series db, this is (close to) the time that the response was sent + /// for the account database, this is rounded to the week + response_timestamp: i64, + /// true if an archive server was needed to serve the request + archive_needed: bool, + /// true if the response was some sort of JSONRPC error + error_response: bool, + /// method tracking is opt-in + method: Option, + /// origin tracking is opt-in + origin: Option, + /// None if the public url was used + rpc_secret_key_id: Option, +} + +/// round the unix epoch time to the start of a period +fn round_timestamp(timestamp: i64, period_seconds: i64) -> i64 { + timestamp / period_seconds * period_seconds +} + +impl RpcQueryStats { + /// rpc keys can opt into multiple levels of tracking. + /// we always need enough to handle billing, so even the "none" level still has some minimal tracking. + /// This "accounting_key" is used in the relational database. + /// anonymous users are also saved in the relational database so that the host can do their own cost accounting. + fn accounting_key(&self, period_seconds: i64) -> RpcQueryKey { + let response_timestamp = round_timestamp(self.response_timestamp, period_seconds); + + let rpc_secret_key_id = self.authorization.checks.rpc_secret_key_id; + + let (method, origin) = match self.authorization.checks.tracking_level { + TrackingLevel::None => { + // this RPC key requested no tracking. this is the default + // do not store the method or the origin + (None, None) + } + TrackingLevel::Aggregated => { + // this RPC key requested tracking aggregated across all methods and origins + // TODO: think about this more. do we want the origin or not? grouping free cost per site might be useful. i'd rather not collect things if we don't have a planned purpose though + let method = None; + let origin = None; + + (method, origin) + } + TrackingLevel::Detailed => { + // detailed tracking keeps track of the method and origin + // depending on the request, the origin might still be None + let method = Some(self.method.clone()); + let origin = self.authorization.origin.clone(); + + (method, origin) + } + }; + + RpcQueryKey { + response_timestamp, + archive_needed: self.archive_request, + error_response: self.error_response, + method, + rpc_secret_key_id, + origin, + } + } + + /// all queries are aggregated + /// TODO: should we store "anon" or "registered" as a key just to be able to split graphs? + fn global_timeseries_key(&self) -> RpcQueryKey { + let method = Some(self.method.clone()); + // we don't store origin in the timeseries db. its only used for optional accounting + let origin = None; + // everyone gets grouped together + let rpc_secret_key_id = None; + + RpcQueryKey { + response_timestamp: self.response_timestamp, + archive_needed: self.archive_request, + error_response: self.error_response, + method, + rpc_secret_key_id, + origin, + } + } + + fn opt_in_timeseries_key(&self) -> RpcQueryKey { + // we don't store origin in the timeseries db. its only optionaly used for accounting + let origin = None; + + let (method, rpc_secret_key_id) = match self.authorization.checks.tracking_level { + TrackingLevel::None => { + // this RPC key requested no tracking. this is the default. + // we still want graphs though, so we just use None as the rpc_secret_key_id + (Some(self.method.clone()), None) + } + TrackingLevel::Aggregated => { + // this RPC key requested tracking aggregated across all methods + (None, self.authorization.checks.rpc_secret_key_id) + } + TrackingLevel::Detailed => { + // detailed tracking keeps track of the method + ( + Some(self.method.clone()), + self.authorization.checks.rpc_secret_key_id, + ) + } + }; + + RpcQueryKey { + response_timestamp: self.response_timestamp, + archive_needed: self.archive_request, + error_response: self.error_response, + method, + rpc_secret_key_id, + origin, + } + } +} + +#[derive(Default)] +pub struct BufferedRpcQueryStats { + frontend_requests: u64, + backend_requests: u64, + backend_retries: u64, + no_servers: u64, + cache_misses: u64, + cache_hits: u64, + sum_request_bytes: u64, + sum_response_bytes: u64, + sum_response_millis: u64, +} + +/// A stat that we aggregate and then store in a database. +/// For now there is just one, but I think there might be others later +#[derive(Debug, From)] +pub enum AppStat { + RpcQuery(RpcQueryStats), +} + +#[derive(From)] +pub struct SpawnedStatBuffer { + pub stat_sender: flume::Sender, + /// these handles are important and must be allowed to finish + pub background_handle: JoinHandle>, +} + +pub struct StatBuffer { + chain_id: u64, + db_conn: Option, + influxdb_client: Option, + tsdb_save_interval_seconds: u32, + db_save_interval_seconds: u32, + billing_period_seconds: i64, +} + +impl BufferedRpcQueryStats { + fn add(&mut self, stat: RpcQueryStats) { + // a stat always come from just 1 frontend request + self.frontend_requests += 1; + + if stat.backend_requests == 0 { + // no backend request. cache hit! + self.cache_hits += 1; + } else { + // backend requests! cache miss! + self.cache_misses += 1; + + // a single frontend request might have multiple backend requests + self.backend_requests += stat.backend_requests; + } + + self.sum_request_bytes += stat.request_bytes; + self.sum_response_bytes += stat.response_bytes; + self.sum_response_millis += stat.response_millis; + } + + // TODO: take a db transaction instead so that we can batch? + async fn save_db( + self, + chain_id: u64, + db_conn: &DatabaseConnection, + key: RpcQueryKey, + ) -> anyhow::Result<()> { + let period_datetime = Utc.timestamp_opt(key.response_timestamp as i64, 0).unwrap(); + + // this is a lot of variables + let accounting_entry = rpc_accounting_v2::ActiveModel { + id: sea_orm::NotSet, + rpc_key_id: sea_orm::Set(key.rpc_secret_key_id.map(Into::into)), + origin: sea_orm::Set(key.origin.map(|x| x.to_string())), + chain_id: sea_orm::Set(chain_id), + period_datetime: sea_orm::Set(period_datetime), + method: sea_orm::Set(key.method), + archive_needed: sea_orm::Set(key.archive_needed), + error_response: sea_orm::Set(key.error_response), + frontend_requests: sea_orm::Set(self.frontend_requests), + backend_requests: sea_orm::Set(self.backend_requests), + backend_retries: sea_orm::Set(self.backend_retries), + no_servers: sea_orm::Set(self.no_servers), + cache_misses: sea_orm::Set(self.cache_misses), + cache_hits: sea_orm::Set(self.cache_hits), + sum_request_bytes: sea_orm::Set(self.sum_request_bytes), + sum_response_millis: sea_orm::Set(self.sum_response_millis), + sum_response_bytes: sea_orm::Set(self.sum_response_bytes), + }; + + rpc_accounting_v2::Entity::insert(accounting_entry) + .on_conflict( + OnConflict::new() + .values([ + ( + rpc_accounting_v2::Column::FrontendRequests, + Expr::col(rpc_accounting_v2::Column::FrontendRequests) + .add(self.frontend_requests), + ), + ( + rpc_accounting_v2::Column::BackendRequests, + Expr::col(rpc_accounting_v2::Column::BackendRequests) + .add(self.backend_requests), + ), + ( + rpc_accounting_v2::Column::BackendRetries, + Expr::col(rpc_accounting_v2::Column::BackendRetries) + .add(self.backend_retries), + ), + ( + rpc_accounting_v2::Column::NoServers, + Expr::col(rpc_accounting_v2::Column::NoServers).add(self.no_servers), + ), + ( + rpc_accounting_v2::Column::CacheMisses, + Expr::col(rpc_accounting_v2::Column::CacheMisses) + .add(self.cache_misses), + ), + ( + rpc_accounting_v2::Column::CacheHits, + Expr::col(rpc_accounting_v2::Column::CacheHits).add(self.cache_hits), + ), + ( + rpc_accounting_v2::Column::SumRequestBytes, + Expr::col(rpc_accounting_v2::Column::SumRequestBytes) + .add(self.sum_request_bytes), + ), + ( + rpc_accounting_v2::Column::SumResponseMillis, + Expr::col(rpc_accounting_v2::Column::SumResponseMillis) + .add(self.sum_response_millis), + ), + ( + rpc_accounting_v2::Column::SumResponseBytes, + Expr::col(rpc_accounting_v2::Column::SumResponseBytes) + .add(self.sum_response_bytes), + ), + ]) + .to_owned(), + ) + .exec(db_conn) + .await?; + + Ok(()) + } + + // TODO: change this to return a DataPoint? + async fn save_timeseries( + self, + bucket: &str, + measurement: &str, + chain_id: u64, + influxdb2_clent: &influxdb2::Client, + key: RpcQueryKey, + ) -> anyhow::Result<()> { + // TODO: error if key.origin is set? + + // TODO: what name? + let mut builder = DataPoint::builder(measurement); + + builder = builder.tag("chain_id", chain_id.to_string()); + + if let Some(rpc_secret_key_id) = key.rpc_secret_key_id { + builder = builder.tag("rpc_secret_key_id", rpc_secret_key_id.to_string()); + } + + if let Some(method) = key.method { + builder = builder.tag("method", method); + } + + builder = builder + .tag("archive_needed", key.archive_needed.to_string()) + .tag("error_response", key.error_response.to_string()) + .field("frontend_requests", self.frontend_requests as i64) + .field("backend_requests", self.backend_requests as i64) + .field("no_servers", self.no_servers as i64) + .field("cache_misses", self.cache_misses as i64) + .field("cache_hits", self.cache_hits as i64) + .field("sum_request_bytes", self.sum_request_bytes as i64) + .field("sum_response_millis", self.sum_response_millis as i64) + .field("sum_response_bytes", self.sum_response_bytes as i64); + + builder = builder.timestamp(key.response_timestamp); + let timestamp_precision = TimestampPrecision::Seconds; + + let points = [builder.build()?]; + + // TODO: bucket should be an enum so that we don't risk typos + influxdb2_clent + .write_with_precision(bucket, stream::iter(points), timestamp_precision) + .await?; + + Ok(()) + } +} + +impl RpcQueryStats { + pub fn new( + method: String, + authorization: Arc, + metadata: Arc, + response_bytes: usize, + ) -> Self { + // TODO: try_unwrap the metadata to be sure that all the stats for this request have been collected + // TODO: otherwise, i think the whole thing should be in a single lock that we can "reset" when a stat is created + + let archive_request = metadata.archive_request.load(Ordering::Acquire); + let backend_requests = metadata.backend_requests.lock().len() as u64; + let request_bytes = metadata.request_bytes; + let error_response = metadata.error_response.load(Ordering::Acquire); + let response_millis = metadata.start_instant.elapsed().as_millis() as u64; + let response_bytes = response_bytes as u64; + + let response_timestamp = Utc::now().timestamp(); + + Self { + authorization, + archive_request, + method, + backend_requests, + request_bytes, + error_response, + response_bytes, + response_millis, + response_timestamp, + } + } +} + +impl StatBuffer { + pub fn try_spawn( + chain_id: u64, + db_conn: Option, + influxdb_client: Option, + db_save_interval_seconds: u32, + tsdb_save_interval_seconds: u32, + billing_period_seconds: i64, + shutdown_receiver: broadcast::Receiver<()>, + ) -> anyhow::Result> { + if db_conn.is_none() && influxdb_client.is_none() { + return Ok(None); + } + + let (stat_sender, stat_receiver) = flume::unbounded(); + + let mut new = Self { + chain_id, + db_conn, + influxdb_client, + db_save_interval_seconds, + tsdb_save_interval_seconds, + billing_period_seconds, + }; + + // any errors inside this task will cause the application to exit + let handle = tokio::spawn(async move { + new.aggregate_and_save_loop(stat_receiver, shutdown_receiver) + .await + }); + + Ok(Some((stat_sender, handle).into())) + } + + async fn aggregate_and_save_loop( + &mut self, + stat_receiver: flume::Receiver, + mut shutdown_receiver: broadcast::Receiver<()>, + ) -> anyhow::Result<()> { + let mut tsdb_save_interval = + interval(Duration::from_secs(self.tsdb_save_interval_seconds as u64)); + let mut db_save_interval = + interval(Duration::from_secs(self.db_save_interval_seconds as u64)); + + // TODO: this is used for rpc_accounting_v2 and influxdb. give it a name to match that? "stat" of some kind? + let mut global_timeseries_buffer = HashMap::::new(); + let mut opt_in_timeseries_buffer = HashMap::::new(); + let mut accounting_db_buffer = HashMap::::new(); + + loop { + tokio::select! { + stat = stat_receiver.recv_async() => { + // save the stat to a buffer + match stat { + Ok(AppStat::RpcQuery(stat)) => { + if self.influxdb_client.is_some() { + // TODO: round the timestamp at all? + + let global_timeseries_key = stat.global_timeseries_key(); + + global_timeseries_buffer.entry(global_timeseries_key).or_default().add(stat.clone()); + + let opt_in_timeseries_key = stat.opt_in_timeseries_key(); + + opt_in_timeseries_buffer.entry(opt_in_timeseries_key).or_default().add(stat.clone()); + } + + if self.db_conn.is_some() { + accounting_db_buffer.entry(stat.accounting_key(self.billing_period_seconds)).or_default().add(stat); + } + } + Err(err) => { + error!("error receiving stat: {:?}", err); + break; + } + } + } + _ = db_save_interval.tick() => { + let db_conn = self.db_conn.as_ref().expect("db connection should always exist if there are buffered stats"); + + // TODO: batch saves + for (key, stat) in accounting_db_buffer.drain() { + // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now + if let Err(err) = stat.save_db(self.chain_id, db_conn, key).await { + error!("unable to save accounting entry! err={:?}", err); + }; + } + } + _ = tsdb_save_interval.tick() => { + // TODO: batch saves + // TODO: better bucket names + let influxdb_client = self.influxdb_client.as_ref().expect("influxdb client should always exist if there are buffered stats"); + + for (key, stat) in global_timeseries_buffer.drain() { + // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now + if let Err(err) = stat.save_timeseries("dev_web3_proxy", "global_proxy", self.chain_id, influxdb_client, key).await { + error!("unable to save global stat! err={:?}", err); + }; + } + + for (key, stat) in opt_in_timeseries_buffer.drain() { + // TODO: i don't like passing key (which came from the stat) to the function on the stat. but it works for now + if let Err(err) = stat.save_timeseries("dev_web3_proxy", "opt_in_proxy", self.chain_id, influxdb_client, key).await { + error!("unable to save opt-in stat! err={:?}", err); + }; + } + } + x = shutdown_receiver.recv() => { + match x { + Ok(_) => { + info!("stat_loop shutting down"); + // TODO: call aggregate_stat for all the + }, + Err(err) => error!("stat_loop shutdown receiver err={:?}", err), + } + break; + } + } + } + + // TODO: dry + if let Some(db_conn) = self.db_conn.as_ref() { + info!( + "saving {} buffered accounting entries", + accounting_db_buffer.len(), + ); + + for (key, stat) in accounting_db_buffer.drain() { + if let Err(err) = stat.save_db(self.chain_id, db_conn, key).await { + error!( + "Unable to save accounting entry while shutting down! err={:?}", + err + ); + }; + } + } + + // TODO: dry + if let Some(influxdb_client) = self.influxdb_client.as_ref() { + info!( + "saving {} buffered global stats", + global_timeseries_buffer.len(), + ); + + for (key, stat) in global_timeseries_buffer.drain() { + if let Err(err) = stat + .save_timeseries( + "dev_web3_proxy", + "global_proxy", + self.chain_id, + influxdb_client, + key, + ) + .await + { + error!( + "Unable to save global stat while shutting down! err={:?}", + err + ); + }; + } + + info!( + "saving {} buffered opt-in stats", + opt_in_timeseries_buffer.len(), + ); + + for (key, stat) in opt_in_timeseries_buffer.drain() { + if let Err(err) = stat + .save_timeseries( + "dev_web3_proxy", + "opt_in_proxy", + self.chain_id, + influxdb_client, + key, + ) + .await + { + error!( + "unable to save opt-in stat while shutting down! err={:?}", + err + ); + }; + } + } + + info!("accounting and stat save loop complete"); + + Ok(()) + } +}