From fbf16eb6df338a0a13b98ea7c2e82f8f79435fc1 Mon Sep 17 00:00:00 2001 From: Peter-Jan Brone Date: Tue, 30 Jul 2024 13:48:35 +0200 Subject: [PATCH] Use constant id for health refresh failure alerts (#1402) There's not much of an upside to using a random id for health refreshes. This PR dismisses the alert when health refreshes succeed, ensuring these critical alerts auto-dismiss if they succeed. Bit of a trade off obviously but I like this better (?). --- autopilot/alerts.go | 3 ++- autopilot/migrator.go | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/autopilot/alerts.go b/autopilot/alerts.go index 5777748d2..1d089c39d 100644 --- a/autopilot/alerts.go +++ b/autopilot/alerts.go @@ -14,6 +14,7 @@ import ( var ( alertAccountRefillID = alerts.RandomAlertID() // constant until restarted + alertHealthRefreshID = alerts.RandomAlertID() // constant until restarted alertLowBalanceID = alerts.RandomAlertID() // constant until restarted alertMigrationID = alerts.RandomAlertID() // constant until restarted alertPruningID = alerts.RandomAlertID() // constant until restarted @@ -166,7 +167,7 @@ func newMigrationFailedAlert(slabKey object.EncryptionKey, health float64, objec func newRefreshHealthFailedAlert(err error) alerts.Alert { return alerts.Alert{ - ID: alerts.RandomAlertID(), + ID: alertHealthRefreshID, Severity: alerts.SeverityCritical, Message: "Health refresh failed", Data: map[string]interface{}{ diff --git a/autopilot/migrator.go b/autopilot/migrator.go index cf4195a89..08c0a9c93 100644 --- a/autopilot/migrator.go +++ b/autopilot/migrator.go @@ -268,6 +268,7 @@ OUTER: m.ap.RegisterAlert(m.ap.shutdownCtx, newRefreshHealthFailedAlert(err)) m.logger.Errorf("failed to recompute cached health before migration: %v", err) } else { + m.ap.DismissAlert(m.ap.shutdownCtx, alertHealthRefreshID) m.logger.Infof("recomputed slab health in %v", time.Since(start)) updateToMigrate() }