From fbf16eb6df338a0a13b98ea7c2e82f8f79435fc1 Mon Sep 17 00:00:00 2001
From: Peter-Jan Brone <peterjan.brone@gmail.com>
Date: Tue, 30 Jul 2024 13:48:35 +0200
Subject: [PATCH] Use constant id for health refresh failure alerts (#1402)

There's not much of an upside to using a random id for health refreshes.
This PR dismisses the alert when health refreshes succeed, ensuring
these critical alerts auto-dismiss if they succeed. Bit of a trade off
obviously but I like this better (?).
---
 autopilot/alerts.go   | 3 ++-
 autopilot/migrator.go | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/autopilot/alerts.go b/autopilot/alerts.go
index 5777748d2..1d089c39d 100644
--- a/autopilot/alerts.go
+++ b/autopilot/alerts.go
@@ -14,6 +14,7 @@ import (
 
 var (
 	alertAccountRefillID = alerts.RandomAlertID() // constant until restarted
+	alertHealthRefreshID = alerts.RandomAlertID() // constant until restarted
 	alertLowBalanceID    = alerts.RandomAlertID() // constant until restarted
 	alertMigrationID     = alerts.RandomAlertID() // constant until restarted
 	alertPruningID       = alerts.RandomAlertID() // constant until restarted
@@ -166,7 +167,7 @@ func newMigrationFailedAlert(slabKey object.EncryptionKey, health float64, objec
 
 func newRefreshHealthFailedAlert(err error) alerts.Alert {
 	return alerts.Alert{
-		ID:       alerts.RandomAlertID(),
+		ID:       alertHealthRefreshID,
 		Severity: alerts.SeverityCritical,
 		Message:  "Health refresh failed",
 		Data: map[string]interface{}{
diff --git a/autopilot/migrator.go b/autopilot/migrator.go
index cf4195a89..08c0a9c93 100644
--- a/autopilot/migrator.go
+++ b/autopilot/migrator.go
@@ -268,6 +268,7 @@ OUTER:
 			m.ap.RegisterAlert(m.ap.shutdownCtx, newRefreshHealthFailedAlert(err))
 			m.logger.Errorf("failed to recompute cached health before migration: %v", err)
 		} else {
+			m.ap.DismissAlert(m.ap.shutdownCtx, alertHealthRefreshID)
 			m.logger.Infof("recomputed slab health in %v", time.Since(start))
 			updateToMigrate()
 		}