sentryd to pagerduty
This commit is contained in:
parent
54d190acfc
commit
36d64489d8
1
TODO.md
1
TODO.md
|
@ -318,6 +318,7 @@ These are not yet ordered. There might be duplicates. We might not actually need
|
||||||
- [x] use channels instead of arcswap
|
- [x] use channels instead of arcswap
|
||||||
- this will let us easily wait for a new head or a new synced connection
|
- this will let us easily wait for a new head or a new synced connection
|
||||||
- [x] broadcast transactions to more servers
|
- [x] broadcast transactions to more servers
|
||||||
|
- [x] send sentryd errors to pagerduty
|
||||||
- [-] proxy mode for benchmarking all backends
|
- [-] proxy mode for benchmarking all backends
|
||||||
- [-] proxy mode for sending to multiple backends
|
- [-] proxy mode for sending to multiple backends
|
||||||
- [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly
|
- [-] let users choose a % of reverts to log (or maybe x/second). someone like curve logging all reverts will be a BIG database very quickly
|
||||||
|
|
|
@ -368,7 +368,7 @@ fn main() -> anyhow::Result<()> {
|
||||||
warn!("sentry_url is not set! Logs will only show in this console");
|
warn!("sentry_url is not set! Logs will only show in this console");
|
||||||
}
|
}
|
||||||
|
|
||||||
x.main().await
|
x.main(pagerduty_async).await
|
||||||
}
|
}
|
||||||
SubCommand::RpcAccounting(x) => {
|
SubCommand::RpcAccounting(x) => {
|
||||||
let db_url = cli_config
|
let db_url = cli_config
|
||||||
|
|
|
@ -3,7 +3,7 @@ use log::{error, info};
|
||||||
use pagerduty_rs::{eventsv2async::EventsV2 as PagerdutyAsyncEventsV2, types::Event};
|
use pagerduty_rs::{eventsv2async::EventsV2 as PagerdutyAsyncEventsV2, types::Event};
|
||||||
use web3_proxy::{
|
use web3_proxy::{
|
||||||
config::TopConfig,
|
config::TopConfig,
|
||||||
pagerduty::{pagerduty_event_for_config, trigger_pagerduty_alert},
|
pagerduty::{pagerduty_alert, pagerduty_event_for_config},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(FromArgs, PartialEq, Debug, Eq)]
|
#[derive(FromArgs, PartialEq, Debug, Eq)]
|
||||||
|
@ -40,30 +40,33 @@ impl PagerdutySubCommand {
|
||||||
pagerduty_async: Option<PagerdutyAsyncEventsV2>,
|
pagerduty_async: Option<PagerdutyAsyncEventsV2>,
|
||||||
top_config: Option<TopConfig>,
|
top_config: Option<TopConfig>,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
|
// TODO: allow customizing severity
|
||||||
let event = top_config
|
let event = top_config
|
||||||
.map(|top_config| {
|
.map(|top_config| {
|
||||||
pagerduty_event_for_config(
|
pagerduty_event_for_config(
|
||||||
top_config,
|
|
||||||
self.class.clone(),
|
self.class.clone(),
|
||||||
self.component.clone(),
|
self.component.clone(),
|
||||||
|
None::<()>,
|
||||||
Some(self.group.clone()),
|
Some(self.group.clone()),
|
||||||
|
pagerduty_rs::types::Severity::Error,
|
||||||
self.summary.clone(),
|
self.summary.clone(),
|
||||||
None,
|
None,
|
||||||
None::<()>,
|
top_config,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
trigger_pagerduty_alert(
|
pagerduty_alert(
|
||||||
"web3-proxy".to_string(),
|
|
||||||
None,
|
None,
|
||||||
self.class,
|
self.class,
|
||||||
|
"web3-proxy".to_string(),
|
||||||
None,
|
None,
|
||||||
self.component,
|
self.component,
|
||||||
|
None::<()>,
|
||||||
Some(self.group),
|
Some(self.group),
|
||||||
|
pagerduty_rs::types::Severity::Error,
|
||||||
None,
|
None,
|
||||||
self.summary,
|
self.summary,
|
||||||
None,
|
None,
|
||||||
None::<()>,
|
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -6,9 +6,12 @@ use futures::{
|
||||||
stream::{FuturesUnordered, StreamExt},
|
stream::{FuturesUnordered, StreamExt},
|
||||||
Future,
|
Future,
|
||||||
};
|
};
|
||||||
|
use log::{error, info};
|
||||||
|
use pagerduty_rs::{eventsv2async::EventsV2 as PagerdutyAsyncEventsV2, types::Event};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use tokio::time::{interval, MissedTickBehavior};
|
use tokio::time::{interval, MissedTickBehavior};
|
||||||
|
use web3_proxy::pagerduty::pagerduty_alert;
|
||||||
|
|
||||||
#[derive(FromArgs, PartialEq, Debug, Eq)]
|
#[derive(FromArgs, PartialEq, Debug, Eq)]
|
||||||
/// Loop healthchecks and send pager duty alerts if any fail
|
/// Loop healthchecks and send pager duty alerts if any fail
|
||||||
|
@ -39,8 +42,15 @@ pub struct SentrydSubCommand {
|
||||||
seconds: Option<u64>,
|
seconds: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Error {
|
||||||
|
class: String,
|
||||||
|
level: log::Level,
|
||||||
|
anyhow: anyhow::Error,
|
||||||
|
}
|
||||||
|
|
||||||
impl SentrydSubCommand {
|
impl SentrydSubCommand {
|
||||||
pub async fn main(self) -> anyhow::Result<()> {
|
pub async fn main(self, pagerduty_async: Option<PagerdutyAsyncEventsV2>) -> anyhow::Result<()> {
|
||||||
// sentry logging should already be configured
|
// sentry logging should already be configured
|
||||||
|
|
||||||
let seconds = self.seconds.unwrap_or(60);
|
let seconds = self.seconds.unwrap_or(60);
|
||||||
|
@ -48,15 +58,44 @@ impl SentrydSubCommand {
|
||||||
let mut handles = FuturesUnordered::new();
|
let mut handles = FuturesUnordered::new();
|
||||||
|
|
||||||
// channels and a task for sending errors to logs/pagerduty
|
// channels and a task for sending errors to logs/pagerduty
|
||||||
let (error_sender, mut error_receiver) = mpsc::channel::<(log::Level, anyhow::Error)>(10);
|
let (error_sender, mut error_receiver) = mpsc::channel::<Error>(10);
|
||||||
|
|
||||||
{
|
{
|
||||||
let error_handler_f = async move {
|
let error_handler_f = async move {
|
||||||
while let Some((error_level, err)) = error_receiver.recv().await {
|
if pagerduty_async.is_none() {
|
||||||
log::log!(error_level, "check failed: {:?}", err);
|
info!("set PAGERDUTY_INTEGRATION_KEY to send create alerts for errors");
|
||||||
|
}
|
||||||
|
|
||||||
if matches!(error_level, log::Level::Error) {
|
while let Some(err) = error_receiver.recv().await {
|
||||||
todo!("send to pager duty if pager duty exists");
|
log::log!(err.level, "check failed: {:?}", err);
|
||||||
|
|
||||||
|
if matches!(err.level, log::Level::Error) {
|
||||||
|
let alert = pagerduty_alert(
|
||||||
|
None,
|
||||||
|
Some(err.class),
|
||||||
|
"web3-proxy-sentry".to_string(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None::<()>,
|
||||||
|
Some("web3-proxy-sentry".to_string()),
|
||||||
|
pagerduty_rs::types::Severity::Error,
|
||||||
|
None,
|
||||||
|
format!("{}", err.anyhow),
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(pagerduty_async) = pagerduty_async.as_ref() {
|
||||||
|
info!(
|
||||||
|
"sending to pagerduty: {}",
|
||||||
|
serde_json::to_string_pretty(&alert)?
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Err(err) =
|
||||||
|
pagerduty_async.event(Event::AlertTrigger(alert)).await
|
||||||
|
{
|
||||||
|
error!("Failed sending to pagerduty: {}", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,9 +112,13 @@ impl SentrydSubCommand {
|
||||||
let url = format!("{}/health", self.web3_proxy);
|
let url = format!("{}/health", self.web3_proxy);
|
||||||
let error_sender = error_sender.clone();
|
let error_sender = error_sender.clone();
|
||||||
|
|
||||||
let loop_f = a_loop(seconds, log::Level::Error, error_sender, move || {
|
let loop_f = a_loop(
|
||||||
simple::main(url.clone())
|
"main /health",
|
||||||
});
|
seconds,
|
||||||
|
log::Level::Error,
|
||||||
|
error_sender,
|
||||||
|
move || simple::main(url.clone()),
|
||||||
|
);
|
||||||
|
|
||||||
handles.push(tokio::spawn(loop_f));
|
handles.push(tokio::spawn(loop_f));
|
||||||
}
|
}
|
||||||
|
@ -84,9 +127,13 @@ impl SentrydSubCommand {
|
||||||
let url = format!("{}/health", other_web3_proxy);
|
let url = format!("{}/health", other_web3_proxy);
|
||||||
let error_sender = error_sender.clone();
|
let error_sender = error_sender.clone();
|
||||||
|
|
||||||
let loop_f = a_loop(seconds, log::Level::Warn, error_sender, move || {
|
let loop_f = a_loop(
|
||||||
simple::main(url.clone())
|
"other /health",
|
||||||
});
|
seconds,
|
||||||
|
log::Level::Warn,
|
||||||
|
error_sender,
|
||||||
|
move || simple::main(url.clone()),
|
||||||
|
);
|
||||||
|
|
||||||
handles.push(tokio::spawn(loop_f));
|
handles.push(tokio::spawn(loop_f));
|
||||||
}
|
}
|
||||||
|
@ -102,9 +149,13 @@ impl SentrydSubCommand {
|
||||||
|
|
||||||
others.extend(self.other_rpc.clone());
|
others.extend(self.other_rpc.clone());
|
||||||
|
|
||||||
let loop_f = a_loop(seconds, log::Level::Error, error_sender, move || {
|
let loop_f = a_loop(
|
||||||
compare::main(rpc.clone(), others.clone(), max_age, max_lag)
|
"head block comparison",
|
||||||
});
|
seconds,
|
||||||
|
log::Level::Error,
|
||||||
|
error_sender,
|
||||||
|
move || compare::main(rpc.clone(), others.clone(), max_age, max_lag),
|
||||||
|
);
|
||||||
|
|
||||||
handles.push(tokio::spawn(loop_f));
|
handles.push(tokio::spawn(loop_f));
|
||||||
}
|
}
|
||||||
|
@ -120,9 +171,10 @@ impl SentrydSubCommand {
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn a_loop<T>(
|
async fn a_loop<T>(
|
||||||
|
class: &str,
|
||||||
seconds: u64,
|
seconds: u64,
|
||||||
error_level: log::Level,
|
error_level: log::Level,
|
||||||
error_sender: mpsc::Sender<(log::Level, anyhow::Error)>,
|
error_sender: mpsc::Sender<Error>,
|
||||||
f: impl Fn() -> T,
|
f: impl Fn() -> T,
|
||||||
) -> anyhow::Result<()>
|
) -> anyhow::Result<()>
|
||||||
where
|
where
|
||||||
|
@ -137,7 +189,13 @@ where
|
||||||
interval.tick().await;
|
interval.tick().await;
|
||||||
|
|
||||||
if let Err(err) = f().await {
|
if let Err(err) = f().await {
|
||||||
error_sender.send((error_level, err)).await?;
|
let err = Error {
|
||||||
|
class: class.to_string(),
|
||||||
|
level: error_level,
|
||||||
|
anyhow: err,
|
||||||
|
};
|
||||||
|
|
||||||
|
error_sender.send(err).await?;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue