diff --git a/cmd/poller/collector/collector.go b/cmd/poller/collector/collector.go index b87dcfe40..0350f00a1 100644 --- a/cmd/poller/collector/collector.go +++ b/cmd/poller/collector/collector.go @@ -335,7 +335,7 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) { c.SetStatus(1, errs.ErrConnection.Error()) // there are no instances to collect case errors.Is(err, errs.ErrNoInstance): - c.Schedule.SetStandByMode(task, 5*time.Minute) + c.Schedule.SetStandByModeMax(task, 5*time.Minute) c.SetStatus(1, errs.ErrNoInstance.Error()) c.Logger.Info(). Str("task", task.Name). @@ -343,7 +343,7 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) { // no metrics available case errors.Is(err, errs.ErrNoMetric): c.SetStatus(1, errs.ErrNoMetric.Error()) - c.Schedule.SetStandByMode(task, 1*time.Hour) + c.Schedule.SetStandByModeMax(task, 1*time.Hour) c.Logger.Info(). Str("task", task.Name). Str("object", c.Object). @@ -351,11 +351,11 @@ func (c *AbstractCollector) Start(wg *sync.WaitGroup) { // not an error we are expecting, so enter failed or standby state default: if errors.Is(err, errs.ErrPermissionDenied) { - c.Schedule.SetStandByMode(task, 1*time.Hour) + c.Schedule.SetStandByModeMax(task, 1*time.Hour) c.Logger.Error().Err(err).Str("task", task.Name).Msg("Entering standby mode") } else if errors.Is(err, errs.ErrAPIRequestRejected) { // API was rejected, this happens when a resource is not available or does not exist - c.Schedule.SetStandByMode(task, 1*time.Hour) + c.Schedule.SetStandByModeMax(task, 1*time.Hour) // Log as info since some of these aren't errors c.Logger.Info().Err(err).Str("task", task.Name).Msg("Entering standby mode") } else { diff --git a/cmd/poller/schedule/schedule.go b/cmd/poller/schedule/schedule.go index bffdb9c7a..cbdac8c4e 100644 --- a/cmd/poller/schedule/schedule.go +++ b/cmd/poller/schedule/schedule.go @@ -109,9 +109,25 @@ func (s *Schedule) IsTaskStandBy(t *Task) bool { return t.Name == s.standByTask.Name } +// SetStandByModeMax initializes StandbyMode: Schedule will suspend all tasks until +// the critical task t has succeeded. The amount of time to standby will be the maximum of the task's current interval +// and i. That interval will be used for the task until the Schedule recovers to normal mode. +func (s *Schedule) SetStandByModeMax(t *Task, i time.Duration) { + for _, x := range s.tasks { + if x.Name == t.Name { + s.standByTask = t + t.interval = max(i, t.interval) + t.timer = time.Now() + s.standByMode = true + return + } + } + panic("invalid task: " + t.Name) +} + // SetStandByMode initializes StandbyMode: Schedule will suspend all tasks until // the critical task t has succeeded. The temporary interval i will be used for -// the task until Schedule recovers to normal mode. +// the task until the Schedule recovers to normal mode. func (s *Schedule) SetStandByMode(t *Task, i time.Duration) { for _, x := range s.tasks { if x.Name == t.Name {