diff --git a/docs/node-observ-lib/linux/alerts.libsonnet b/docs/node-observ-lib/linux/alerts.libsonnet index 00d33d3d10..8cc89d8fdf 100644 --- a/docs/node-observ-lib/linux/alerts.libsonnet +++ b/docs/node-observ-lib/linux/alerts.libsonnet @@ -414,6 +414,20 @@ description: 'Systemd service {{ $labels.name }} has entered failed state at {{ $labels.instance }}', }, }, + { + alert: 'NodeSystemdServiceCrashlooping', + expr: ||| + increase(node_systemd_service_restart_total{%(filteringSelector)s}[5m]) > 2 + ||| % this.config, + 'for': '15m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'Systemd service keeps restaring, possibly crash looping.', + description: 'Systemd service {{ $labels.name }} has been restarted too many times at {{ $labels.instance }} for the last 15 minutes. Please check if service is crash looping.', + }, + }, ] + if this.config.enableHardware then [{