send panics to pagerduty

This commit is contained in:
Bryan Stitt 2023-01-24 00:05:31 -08:00
parent c1e81089b1
commit 776bcd149f
4 changed files with 272 additions and 6 deletions

191
Cargo.lock generated

@ -948,6 +948,16 @@ dependencies = [
"unicode-segmentation",
]
[[package]]
name = "core-foundation"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.3"
@ -1834,6 +1844,21 @@ version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "foreign-types"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
dependencies = [
"foreign-types-shared",
]
[[package]]
name = "foreign-types-shared"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
[[package]]
name = "form_urlencoded"
version = "1.1.0"
@ -2045,6 +2070,16 @@ dependencies = [
"version_check",
]
[[package]]
name = "gethostname"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a329e22866dd78b35d2c639a4a23d7b950aeae300dfd79f4fb19f74055c2404"
dependencies = [
"libc",
"windows",
]
[[package]]
name = "getrandom"
version = "0.2.6"
@ -2344,6 +2379,19 @@ dependencies = [
"tokio-rustls",
]
[[package]]
name = "hyper-tls"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
dependencies = [
"bytes",
"hyper",
"native-tls",
"tokio",
"tokio-native-tls",
]
[[package]]
name = "iana-time-zone"
version = "0.1.46"
@ -2819,6 +2867,24 @@ dependencies = [
"getrandom",
]
[[package]]
name = "native-tls"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
dependencies = [
"lazy_static",
"libc",
"log",
"openssl",
"openssl-probe",
"openssl-sys",
"schannel",
"security-framework",
"security-framework-sys",
"tempfile",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
@ -3031,6 +3097,51 @@ dependencies = [
"syn",
]
[[package]]
name = "openssl"
version = "0.10.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b102428fd03bc5edf97f62620f7298614c45cedf287c271e7ed450bbaf83f2e1"
dependencies = [
"bitflags",
"cfg-if",
"foreign-types",
"libc",
"once_cell",
"openssl-macros",
"openssl-sys",
]
[[package]]
name = "openssl-macros"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "openssl-probe"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-sys"
version = "0.9.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23bbbf7854cd45b83958ebe919f0e8e516793727652e27fda10a8384cfc790b7"
dependencies = [
"autocfg",
"cc",
"libc",
"pkg-config",
"vcpkg",
]
[[package]]
name = "os_info"
version = "3.5.1"
@ -3078,6 +3189,18 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "pagerduty-rs"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd10bab2b6df910bbe6c4987d76aa4221235103d9a9c000cfabcee6a6abc8f7a"
dependencies = [
"reqwest",
"serde",
"time 0.3.17",
"url",
]
[[package]]
name = "parity-scale-codec"
version = "3.1.2"
@ -3711,10 +3834,12 @@ dependencies = [
"http-body",
"hyper",
"hyper-rustls",
"hyper-tls",
"ipnet",
"js-sys",
"log",
"mime",
"native-tls",
"once_cell",
"percent-encoding",
"pin-project-lite",
@ -3724,6 +3849,7 @@ dependencies = [
"serde_json",
"serde_urlencoded",
"tokio",
"tokio-native-tls",
"tokio-rustls",
"tower-service",
"url",
@ -3943,6 +4069,15 @@ dependencies = [
"syn",
]
[[package]]
name = "schannel"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "713cfb06c7059f3588fb8044c0fad1d09e3c01d225e25b9220dbfdcf16dbb1b3"
dependencies = [
"windows-sys 0.42.0",
]
[[package]]
name = "scheduled-thread-pool"
version = "0.2.6"
@ -4154,6 +4289,29 @@ dependencies = [
"zeroize",
]
[[package]]
name = "security-framework"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "645926f31b250a2dca3c232496c2d898d91036e45ca0e97e0e2390c54e11be36"
dependencies = [
"bitflags",
"core-foundation",
"core-foundation-sys",
"libc",
"security-framework-sys",
]
[[package]]
name = "security-framework-sys"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31c9bb296072e961fcbd8853511dd39c2d8be2deb1e17c6860b1d30732b323b4"
dependencies = [
"core-foundation-sys",
"libc",
]
[[package]]
name = "semver"
version = "1.0.14"
@ -5018,6 +5176,16 @@ dependencies = [
"syn",
]
[[package]]
name = "tokio-native-tls"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b"
dependencies = [
"native-tls",
"tokio",
]
[[package]]
name = "tokio-rustls"
version = "0.23.4"
@ -5427,6 +5595,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vcpkg"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
[[package]]
name = "version_check"
version = "0.9.4"
@ -5583,6 +5757,7 @@ dependencies = [
"fdlimit",
"flume",
"futures",
"gethostname",
"glob",
"handlebars",
"hashbrown 0.13.2",
@ -5597,6 +5772,7 @@ dependencies = [
"notify",
"num",
"num-traits",
"pagerduty-rs",
"parking_lot 0.12.1",
"proctitle",
"redis-rate-limiter",
@ -5679,6 +5855,21 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows"
version = "0.43.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04662ed0e3e5630dfa9b26e4cb823b817f1a9addda855d973a9458c236556244"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc 0.42.0",
"windows_i686_gnu 0.42.0",
"windows_i686_msvc 0.42.0",
"windows_x86_64_gnu 0.42.0",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc 0.42.0",
]
[[package]]
name = "windows-sys"
version = "0.36.1"

@ -39,6 +39,7 @@ ethers = { version = "1.0.2", default-features = false, features = ["rustls", "w
fdlimit = "0.2.1"
flume = "0.10.14"
futures = { version = "0.3.25", features = ["thread-pool"] }
gethostname = "0.4.1"
glob = "0.3.1"
handlebars = "4.3.6"
hashbrown = { version = "0.13.2", features = ["serde"] }
@ -52,6 +53,7 @@ moka = { version = "0.9.6", default-features = false, features = ["future"] }
notify = "5.0.0"
num = "0.4.0"
num-traits = "0.2.15"
pagerduty-rs = { version = "0.1.6", features = ["async", "sync"] }
parking_lot = { version = "0.12.1", features = ["arc_lock"] }
proctitle = "0.1.1"
regex = "1.7.1"

@ -8,6 +8,7 @@ mod create_user;
mod daemon;
mod drop_migration_lock;
mod list_user_tier;
mod pagerduty;
mod rpc_accounting;
mod sentryd;
mod transfer_key;
@ -17,9 +18,13 @@ mod user_import;
use anyhow::Context;
use argh::FromArgs;
use ethers::types::U256;
use log::{info, warn};
use gethostname::gethostname;
use log::{error, info, warn};
use pagerduty_rs::eventsv2sync::EventsV2 as PagerdutySyncEventsV2;
use pagerduty_rs::types::{AlertTrigger, AlertTriggerPayload};
use pagerduty_rs::{eventsv2async::EventsV2 as PagerdutyAsyncEventsV2, types::Event};
use std::{
fs,
fs, panic,
path::Path,
sync::atomic::{self, AtomicUsize},
};
@ -71,6 +76,7 @@ enum SubCommand {
CountUsers(count_users::CountUsersSubCommand),
CreateUser(create_user::CreateUserSubCommand),
DropMigrationLock(drop_migration_lock::DropMigrationLockSubCommand),
Pagerduty(pagerduty::PagerdutySubCommand),
Proxyd(daemon::ProxydSubCommand),
RpcAccounting(rpc_accounting::RpcAccountingSubCommand),
Sentryd(sentryd::SentrydSubCommand),
@ -191,6 +197,70 @@ fn main() -> anyhow::Result<()> {
info!("{}", APP_USER_AGENT);
// optionally connect to pagerduty
// TODO: fix this nested result
let (pagerduty_async, pagerduty_sync) = if let Ok(pagerduty_key) =
std::env::var("PAGERDUTY_INTEGRATION_KEY")
{
let pagerduty_async =
PagerdutyAsyncEventsV2::new(pagerduty_key.clone(), Some(APP_USER_AGENT.to_string()))?;
let pagerduty_sync =
PagerdutySyncEventsV2::new(pagerduty_key, Some(APP_USER_AGENT.to_string()))?;
(Some(pagerduty_async), Some(pagerduty_sync))
} else {
info!("No PAGERDUTY_INTEGRATION_KEY");
(None, None)
};
// panic handler that sends to pagerduty
// TODO: there is a `pagerduty_panic` module that looks like it would work with minor tweaks, but ethers-rs panics when a websocket exit and that would fire too many alerts
if let Some(pagerduty_sync) = pagerduty_sync {
let client = top_config
.as_ref()
.map(|top_config| format!("web3-proxy chain #{}", top_config.app.chain_id))
.unwrap_or_else(|| format!("web3-proxy w/o chain"));
let client_url = top_config
.as_ref()
.and_then(|x| x.app.redirect_public_url.clone());
panic::set_hook(Box::new(move |x| {
let hostname = gethostname().into_string().unwrap_or("unknown".to_string());
let panic_msg = format!("{} {:?}", x, x);
error!("sending panic to pagerduty: {}", panic_msg);
let payload = AlertTriggerPayload {
severity: pagerduty_rs::types::Severity::Error,
summary: panic_msg.clone(),
source: hostname,
timestamp: None,
component: None,
group: Some("web3-proxy".to_string()),
class: Some("panic".to_string()),
custom_details: None::<()>,
};
let event = Event::AlertTrigger(AlertTrigger {
payload,
dedup_key: None,
images: None,
links: None,
client: Some(client.clone()),
client_url: client_url.clone(),
});
if let Err(err) = pagerduty_sync.event(event) {
error!("Failed sending panic to pagerduty: {}", err);
}
}));
} else {
info!("No pagerduty key. Using default panic handler");
}
// set up tokio's async runtime
let mut rt_builder = runtime::Builder::new_multi_thread();
@ -286,6 +356,13 @@ fn main() -> anyhow::Result<()> {
x.main(&db_conn).await
}
SubCommand::Pagerduty(x) => {
if cli_config.sentry_url.is_none() {
warn!("sentry_url is not set! Logs will only show in this console");
}
x.main(pagerduty_async, top_config).await
}
SubCommand::Sentryd(x) => {
if cli_config.sentry_url.is_none() {
warn!("sentry_url is not set! Logs will only show in this console");

@ -13,10 +13,6 @@ use tokio::time::{interval, MissedTickBehavior};
/// Loop healthchecks and send pager duty alerts if any fail
#[argh(subcommand, name = "sentryd")]
pub struct SentrydSubCommand {
#[argh(positional)]
/// a descriptive name for this node (probably the hostname)
location: String,
#[argh(positional)]
/// the main (HTTP only) web3-proxy being checked.
web3_proxy: String,