sentryd to pagerduty
This commit is contained in:
parent
54d190acfc
commit
36d64489d8
1
TODO.md
1
TODO.md
@ -318,6 +318,7 @@ These are not yet ordered. There might be duplicates. We might not actually need
|
||||
- [x] use channels instead of arcswap
|
||||
- this will let us easily wait for a new head or a new synced connection
|
||||
- [x] broadcast transactions to more servers
|
||||
- [x] send sentryd errors to pagerduty
|
||||
- [-] proxy mode for benchmarking all backends
|
||||
- [-] proxy mode for sending to multiple backends
|
||||
- [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly
|
||||
|
@ -368,7 +368,7 @@ fn main() -> anyhow::Result<()> {
|
||||
warn!("sentry_url is not set! Logs will only show in this console");
|
||||
}
|
||||
|
||||
x.main().await
|
||||
x.main(pagerduty_async).await
|
||||
}
|
||||
SubCommand::RpcAccounting(x) => {
|
||||
let db_url = cli_config
|
||||
|
@ -3,7 +3,7 @@ use log::{error, info};
|
||||
use pagerduty_rs::{eventsv2async::EventsV2 as PagerdutyAsyncEventsV2, types::Event};
|
||||
use web3_proxy::{
|
||||
config::TopConfig,
|
||||
pagerduty::{pagerduty_event_for_config, trigger_pagerduty_alert},
|
||||
pagerduty::{pagerduty_alert, pagerduty_event_for_config},
|
||||
};
|
||||
|
||||
#[derive(FromArgs, PartialEq, Debug, Eq)]
|
||||
@ -40,30 +40,33 @@ impl PagerdutySubCommand {
|
||||
pagerduty_async: Option<PagerdutyAsyncEventsV2>,
|
||||
top_config: Option<TopConfig>,
|
||||
) -> anyhow::Result<()> {
|
||||
// TODO: allow customizing severity
|
||||
let event = top_config
|
||||
.map(|top_config| {
|
||||
pagerduty_event_for_config(
|
||||
top_config,
|
||||
self.class.clone(),
|
||||
self.component.clone(),
|
||||
None::<()>,
|
||||
Some(self.group.clone()),
|
||||
pagerduty_rs::types::Severity::Error,
|
||||
self.summary.clone(),
|
||||
None,
|
||||
None::<()>,
|
||||
top_config,
|
||||
)
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
trigger_pagerduty_alert(
|
||||
"web3-proxy".to_string(),
|
||||
pagerduty_alert(
|
||||
None,
|
||||
self.class,
|
||||
"web3-proxy".to_string(),
|
||||
None,
|
||||
self.component,
|
||||
None::<()>,
|
||||
Some(self.group),
|
||||
pagerduty_rs::types::Severity::Error,
|
||||
None,
|
||||
self.summary,
|
||||
None,
|
||||
None::<()>,
|
||||
)
|
||||
});
|
||||
|
||||
|
@ -6,9 +6,12 @@ use futures::{
|
||||
stream::{FuturesUnordered, StreamExt},
|
||||
Future,
|
||||
};
|
||||
use log::{error, info};
|
||||
use pagerduty_rs::{eventsv2async::EventsV2 as PagerdutyAsyncEventsV2, types::Event};
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::time::{interval, MissedTickBehavior};
|
||||
use web3_proxy::pagerduty::pagerduty_alert;
|
||||
|
||||
#[derive(FromArgs, PartialEq, Debug, Eq)]
|
||||
/// Loop healthchecks and send pager duty alerts if any fail
|
||||
@ -39,8 +42,15 @@ pub struct SentrydSubCommand {
|
||||
seconds: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Error {
|
||||
class: String,
|
||||
level: log::Level,
|
||||
anyhow: anyhow::Error,
|
||||
}
|
||||
|
||||
impl SentrydSubCommand {
|
||||
pub async fn main(self) -> anyhow::Result<()> {
|
||||
pub async fn main(self, pagerduty_async: Option<PagerdutyAsyncEventsV2>) -> anyhow::Result<()> {
|
||||
// sentry logging should already be configured
|
||||
|
||||
let seconds = self.seconds.unwrap_or(60);
|
||||
@ -48,15 +58,44 @@ impl SentrydSubCommand {
|
||||
let mut handles = FuturesUnordered::new();
|
||||
|
||||
// channels and a task for sending errors to logs/pagerduty
|
||||
let (error_sender, mut error_receiver) = mpsc::channel::<(log::Level, anyhow::Error)>(10);
|
||||
let (error_sender, mut error_receiver) = mpsc::channel::<Error>(10);
|
||||
|
||||
{
|
||||
let error_handler_f = async move {
|
||||
while let Some((error_level, err)) = error_receiver.recv().await {
|
||||
log::log!(error_level, "check failed: {:?}", err);
|
||||
if pagerduty_async.is_none() {
|
||||
info!("set PAGERDUTY_INTEGRATION_KEY to send create alerts for errors");
|
||||
}
|
||||
|
||||
if matches!(error_level, log::Level::Error) {
|
||||
todo!("send to pager duty if pager duty exists");
|
||||
while let Some(err) = error_receiver.recv().await {
|
||||
log::log!(err.level, "check failed: {:?}", err);
|
||||
|
||||
if matches!(err.level, log::Level::Error) {
|
||||
let alert = pagerduty_alert(
|
||||
None,
|
||||
Some(err.class),
|
||||
"web3-proxy-sentry".to_string(),
|
||||
None,
|
||||
None,
|
||||
None::<()>,
|
||||
Some("web3-proxy-sentry".to_string()),
|
||||
pagerduty_rs::types::Severity::Error,
|
||||
None,
|
||||
format!("{}", err.anyhow),
|
||||
None,
|
||||
);
|
||||
|
||||
if let Some(pagerduty_async) = pagerduty_async.as_ref() {
|
||||
info!(
|
||||
"sending to pagerduty: {}",
|
||||
serde_json::to_string_pretty(&alert)?
|
||||
);
|
||||
|
||||
if let Err(err) =
|
||||
pagerduty_async.event(Event::AlertTrigger(alert)).await
|
||||
{
|
||||
error!("Failed sending to pagerduty: {}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,9 +112,13 @@ impl SentrydSubCommand {
|
||||
let url = format!("{}/health", self.web3_proxy);
|
||||
let error_sender = error_sender.clone();
|
||||
|
||||
let loop_f = a_loop(seconds, log::Level::Error, error_sender, move || {
|
||||
simple::main(url.clone())
|
||||
});
|
||||
let loop_f = a_loop(
|
||||
"main /health",
|
||||
seconds,
|
||||
log::Level::Error,
|
||||
error_sender,
|
||||
move || simple::main(url.clone()),
|
||||
);
|
||||
|
||||
handles.push(tokio::spawn(loop_f));
|
||||
}
|
||||
@ -84,9 +127,13 @@ impl SentrydSubCommand {
|
||||
let url = format!("{}/health", other_web3_proxy);
|
||||
let error_sender = error_sender.clone();
|
||||
|
||||
let loop_f = a_loop(seconds, log::Level::Warn, error_sender, move || {
|
||||
simple::main(url.clone())
|
||||
});
|
||||
let loop_f = a_loop(
|
||||
"other /health",
|
||||
seconds,
|
||||
log::Level::Warn,
|
||||
error_sender,
|
||||
move || simple::main(url.clone()),
|
||||
);
|
||||
|
||||
handles.push(tokio::spawn(loop_f));
|
||||
}
|
||||
@ -102,9 +149,13 @@ impl SentrydSubCommand {
|
||||
|
||||
others.extend(self.other_rpc.clone());
|
||||
|
||||
let loop_f = a_loop(seconds, log::Level::Error, error_sender, move || {
|
||||
compare::main(rpc.clone(), others.clone(), max_age, max_lag)
|
||||
});
|
||||
let loop_f = a_loop(
|
||||
"head block comparison",
|
||||
seconds,
|
||||
log::Level::Error,
|
||||
error_sender,
|
||||
move || compare::main(rpc.clone(), others.clone(), max_age, max_lag),
|
||||
);
|
||||
|
||||
handles.push(tokio::spawn(loop_f));
|
||||
}
|
||||
@ -120,9 +171,10 @@ impl SentrydSubCommand {
|
||||
}
|
||||
|
||||
async fn a_loop<T>(
|
||||
class: &str,
|
||||
seconds: u64,
|
||||
error_level: log::Level,
|
||||
error_sender: mpsc::Sender<(log::Level, anyhow::Error)>,
|
||||
error_sender: mpsc::Sender<Error>,
|
||||
f: impl Fn() -> T,
|
||||
) -> anyhow::Result<()>
|
||||
where
|
||||
@ -137,7 +189,13 @@ where
|
||||
interval.tick().await;
|
||||
|
||||
if let Err(err) = f().await {
|
||||
error_sender.send((error_level, err)).await?;
|
||||
let err = Error {
|
||||
class: class.to_string(),
|
||||
level: error_level,
|
||||
anyhow: err,
|
||||
};
|
||||
|
||||
error_sender.send(err).await?;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user