From 904377a8dc83c1210b0ff75956a4a874bf56efe8 Mon Sep 17 00:00:00 2001 From: Ashley James Date: Fri, 17 Nov 2023 19:12:50 +0530 Subject: [PATCH 1/3] fix(alert): Ignore IPMI slot connector alerts. Fixes: #107 --- src/prometheus_alert_rules/ipmi_sensors.yaml | 6 +++--- tests/unit/test_alert_rules/test_ipmi_sensors.yaml | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/prometheus_alert_rules/ipmi_sensors.yaml b/src/prometheus_alert_rules/ipmi_sensors.yaml index 93ebf02e..d00731b4 100644 --- a/src/prometheus_alert_rules/ipmi_sensors.yaml +++ b/src/prometheus_alert_rules/ipmi_sensors.yaml @@ -75,13 +75,13 @@ groups: LABELS = {{ $labels }} - alert: IPMISensorStateNotOk - expr: ipmi_generic_sensor_value{state=~"Warning|Critical", type!="Entity Presence"} + expr: ipmi_generic_sensor_value{state=~"Warning|Critical", type!="Entity Presence", type!="Slot/Connector"} for: 0m - labels: + labels: severity: "{{ toLower $labels.state }}" annotations: summary: IPMI sensor value in {{ toLower $labels.state }} state. (instance {{ $labels.instance }}) description: | - A sensor value, recorded by ipmi sensor, in {{ toLower $labels.state }} state. Entity Presence sensors are ignored. + A sensor value, recorded by ipmi sensor, in {{ toLower $labels.state }} state. Entity Presence and Slot Connector sensors are ignored. VALUE = {{ $value }} LABELS = {{ $labels }} diff --git a/tests/unit/test_alert_rules/test_ipmi_sensors.yaml b/tests/unit/test_alert_rules/test_ipmi_sensors.yaml index c58e8c7b..fd882a23 100644 --- a/tests/unit/test_alert_rules/test_ipmi_sensors.yaml +++ b/tests/unit/test_alert_rules/test_ipmi_sensors.yaml @@ -228,7 +228,7 @@ tests: exp_annotations: summary: IPMI sensor value in warning state. (instance ubuntu-11) description: | - A sensor value, recorded by ipmi sensor, in warning state. Entity Presence sensors are ignored. + A sensor value, recorded by ipmi sensor, in warning state. Entity Presence and Slot Connector sensors are ignored. VALUE = 20 LABELS = map[__name__:ipmi_generic_sensor_value instance:ubuntu-11 state:Warning] - exp_labels: @@ -238,6 +238,6 @@ tests: exp_annotations: summary: IPMI sensor value in critical state. (instance ubuntu-12) description: | - A sensor value, recorded by ipmi sensor, in critical state. Entity Presence sensors are ignored. + A sensor value, recorded by ipmi sensor, in critical state. Entity Presence and Slot Connector sensors are ignored. VALUE = 50 LABELS = map[__name__:ipmi_generic_sensor_value instance:ubuntu-12 state:Critical] From 138ab173c75e516201f7054f2579a4f1abe83a90 Mon Sep 17 00:00:00 2001 From: Ashley James Date: Sat, 18 Nov 2023 10:44:27 +0530 Subject: [PATCH 2/3] Add input series for slot connector in tests. Also reduce alert expr by removing duplicate type. --- src/prometheus_alert_rules/ipmi_sensors.yaml | 2 +- tests/unit/test_alert_rules/test_ipmi_sensors.yaml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/prometheus_alert_rules/ipmi_sensors.yaml b/src/prometheus_alert_rules/ipmi_sensors.yaml index d00731b4..6052bf3c 100644 --- a/src/prometheus_alert_rules/ipmi_sensors.yaml +++ b/src/prometheus_alert_rules/ipmi_sensors.yaml @@ -75,7 +75,7 @@ groups: LABELS = {{ $labels }} - alert: IPMISensorStateNotOk - expr: ipmi_generic_sensor_value{state=~"Warning|Critical", type!="Entity Presence", type!="Slot/Connector"} + expr: ipmi_generic_sensor_value{state=~"Warning|Critical", type!~"Entity\\sPresence|Slot/Connector"} for: 0m labels: severity: "{{ toLower $labels.state }}" diff --git a/tests/unit/test_alert_rules/test_ipmi_sensors.yaml b/tests/unit/test_alert_rules/test_ipmi_sensors.yaml index fd882a23..84611240 100644 --- a/tests/unit/test_alert_rules/test_ipmi_sensors.yaml +++ b/tests/unit/test_alert_rules/test_ipmi_sensors.yaml @@ -216,6 +216,8 @@ tests: values: '50x15' - series: ipmi_generic_sensor_value{state="Critical", instance="ubuntu-12", type="Entity Presence"} values: '50x15' + - series: ipmi_generic_sensor_value{state="Critical", instance="ubuntu-12", type="Slot/Connector"} + values: '50x15' alert_rule_test: - eval_time: 0m From 190529b5b4adca0e04ef1ce78afb020678a47900 Mon Sep 17 00:00:00 2001 From: Ashley James Date: Mon, 20 Nov 2023 12:03:53 +0530 Subject: [PATCH 3/3] Add comment to IPMI sensor state alert. --- src/prometheus_alert_rules/ipmi_sensors.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/prometheus_alert_rules/ipmi_sensors.yaml b/src/prometheus_alert_rules/ipmi_sensors.yaml index 6052bf3c..81e626ef 100644 --- a/src/prometheus_alert_rules/ipmi_sensors.yaml +++ b/src/prometheus_alert_rules/ipmi_sensors.yaml @@ -74,6 +74,8 @@ groups: FAN_SPEED_RPM = {{ $value }} LABELS = {{ $labels }} + # Entity Presence sensors are ignored since the state doesn't correspond to a real alert + # Slot Connector sensors are ignored since they raise a high number of false positive alerts - alert: IPMISensorStateNotOk expr: ipmi_generic_sensor_value{state=~"Warning|Critical", type!~"Entity\\sPresence|Slot/Connector"} for: 0m