Skip to content

Commit

Permalink
Fix rhelemeter rules' service label value (#627)
Browse files Browse the repository at this point in the history
  • Loading branch information
douglascamata authored Oct 18, 2023
1 parent 9979266 commit 629510c
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 40 deletions.
6 changes: 4 additions & 2 deletions configuration/observatorium/slo.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,10 @@ func TelemeterReceiveSLOs(envName rhobsInstanceEnv, instanceName string) []pyrra
{
name: fmt.Sprintf("rhobs-%s-server-metrics-receive-availability-slo", strings.ToLower(instanceName)),
labels: map[string]string{
"route": fmt.Sprintf("%s-server-receive", strings.ToLower(instanceName)),
slo.PropagationLabelsPrefix + "service": strings.ToLower(instanceName),
"route": fmt.Sprintf("%s-server-receive", strings.ToLower(instanceName)),
// This has to match a service known by app-interface, so we can't use the instance name because Rhelemeter
// is part of the Telemeter service definition for now.
slo.PropagationLabelsPrefix + "service": "telemeter",
},
description: fmt.Sprintf("%s Server /receive is burning too much error budget to guarantee availability SLOs.", instanceName),
successOrErrorsExpr: fmt.Sprintf("haproxy_server_http_responses_total{route=\"%s-server-metrics-v1-receive\", code=~\"5..\"}", strings.ToLower(instanceName)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#RhelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
creationTimestamp: null
labels:
pyrra.dev/service: rhelemeter
pyrra.dev/service: telemeter
route: rhelemeter-server-receive
name: rhobs-rhelemeter-server-metrics-receive-availability-slo
spec:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ metadata:
pyrra.dev/runbook: https://github.com/rhobs/configuration/blob/main/docs/sop/observatorium.md#RhelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
creationTimestamp: null
labels:
pyrra.dev/service: rhelemeter
pyrra.dev/service: telemeter
route: rhelemeter-server-receive
name: rhobs-rhelemeter-server-metrics-receive-availability-slo
spec:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
- expr: sum by(code) (increase(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[4w]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:increase4w
- alert: SLOMetricAbsent
Expand All @@ -30,7 +30,7 @@ spec:
for: 2m
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: medium
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
- interval: 30s
Expand All @@ -40,49 +40,49 @@ spec:
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[5m]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate5m
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[30m]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[30m]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate30m
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[1h]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[1h]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate1h
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[2h]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[2h]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate2h
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[6h]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[6h]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate6h
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[1d]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[1d]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate1d
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[4d]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[4d]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate4d
- alert: RhelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
Expand All @@ -99,7 +99,7 @@ spec:
exhaustion: 2d
long_burnrate_window: 1h
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: high
short_burnrate_window: 5m
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
Expand All @@ -117,7 +117,7 @@ spec:
exhaustion: 4d
long_burnrate_window: 6h
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: high
short_burnrate_window: 30m
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
Expand All @@ -135,7 +135,7 @@ spec:
exhaustion: 2w
long_burnrate_window: 1d
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: medium
short_burnrate_window: 2h
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
Expand All @@ -153,7 +153,7 @@ spec:
exhaustion: 4w
long_burnrate_window: 4d
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: medium
short_burnrate_window: 6h
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
Expand All @@ -162,29 +162,29 @@ spec:
rules:
- expr: "0.99"
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_objective
- expr: 2419200
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_window
- expr: 1 - sum(haproxy_server_http_responses:increase4w{code=~"5..",route="rhelemeter-server-metrics-v1-receive",slo="rhobs-rhelemeter-server-metrics-receive-availability-slo"}
or vector(0)) / sum(haproxy_server_http_responses:increase4w{route="rhelemeter-server-metrics-v1-receive",slo="rhobs-rhelemeter-server-metrics-receive-availability-slo"})
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_availability
- expr: sum(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"})
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_requests_total
- expr: sum(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}
or vector(0))
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_errors_total
- interval: 2m30s
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
- expr: sum by(code) (increase(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[4w]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:increase4w
- alert: SLOMetricAbsent
Expand All @@ -30,7 +30,7 @@ spec:
for: 2m
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: medium
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
- interval: 30s
Expand All @@ -40,49 +40,49 @@ spec:
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[5m]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate5m
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[30m]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[30m]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate30m
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[1h]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[1h]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate1h
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[2h]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[2h]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate2h
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[6h]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[6h]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate6h
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[1d]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[1d]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate1d
- expr: sum(rate(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}[4d]))
/ sum(rate(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"}[4d]))
labels:
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: haproxy_server_http_responses:burnrate4d
- alert: RhelemeterServerMetricsReceiveWriteAvailabilityErrorBudgetBurning
Expand All @@ -99,7 +99,7 @@ spec:
exhaustion: 2d
long_burnrate_window: 1h
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: high
short_burnrate_window: 5m
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
Expand All @@ -117,7 +117,7 @@ spec:
exhaustion: 4d
long_burnrate_window: 6h
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: high
short_burnrate_window: 30m
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
Expand All @@ -135,7 +135,7 @@ spec:
exhaustion: 2w
long_burnrate_window: 1d
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: medium
short_burnrate_window: 2h
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
Expand All @@ -153,7 +153,7 @@ spec:
exhaustion: 4w
long_burnrate_window: 4d
route: rhelemeter-server-metrics-v1-receive
service: rhelemeter
service: telemeter
severity: medium
short_burnrate_window: 6h
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
Expand All @@ -162,29 +162,29 @@ spec:
rules:
- expr: "0.99"
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_objective
- expr: 2419200
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_window
- expr: 1 - sum(haproxy_server_http_responses:increase4w{code=~"5..",route="rhelemeter-server-metrics-v1-receive",slo="rhobs-rhelemeter-server-metrics-receive-availability-slo"}
or vector(0)) / sum(haproxy_server_http_responses:increase4w{route="rhelemeter-server-metrics-v1-receive",slo="rhobs-rhelemeter-server-metrics-receive-availability-slo"})
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_availability
- expr: sum(haproxy_server_http_responses_total{route="rhelemeter-server-metrics-v1-receive"})
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_requests_total
- expr: sum(haproxy_server_http_responses_total{code=~"5..",route="rhelemeter-server-metrics-v1-receive"}
or vector(0))
labels:
service: rhelemeter
service: telemeter
slo: rhobs-rhelemeter-server-metrics-receive-availability-slo
record: pyrra_errors_total
- interval: 2m30s
Expand Down

0 comments on commit 629510c

Please sign in to comment.