Skip to content

Commit

Permalink
fix: schedule job always success even when failing
Browse files Browse the repository at this point in the history
Schedule returned "success" is always true, even when executed states
have failed.

However the retcode seems enough to catch issues.
  • Loading branch information
kpetremann committed Apr 14, 2023
1 parent eb06224 commit 465d27f
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"github.com/rs/zerolog/log"
)

func ExposeMetrics(ctx context.Context, eventChan chan events.SaltEvent) {
func ExposeMetrics(ctx context.Context, eventChan <-chan events.SaltEvent) {
newJobCounter := promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "salt_new_job_total",
Expand Down Expand Up @@ -65,10 +65,15 @@ func ExposeMetrics(ctx context.Context, eventChan chan events.SaltEvent) {
case "ret":
state := event.ExtractState()
if event.IsScheduleJob {
// for scheduled job, when the states in the job actually failed
// - the global "success" value is always true
// - the substate success is false, and the global retcode is > 0
// using retcode could be enough, but in case there are other corner cases, we combine both values
success := event.Data.Success && (event.Data.Retcode == 0)
scheduledJobReturnCounter.WithLabelValues(
event.Data.Fun,
state,
strconv.FormatBool(event.Data.Success),
strconv.FormatBool(success),
).Inc()
} else {
sucess := strconv.FormatBool(event.Data.Success)
Expand Down

0 comments on commit 465d27f

Please sign in to comment.