From 64575c0005775741a7e41997d0b82c351e12cab2 Mon Sep 17 00:00:00 2001 From: Chris Schinnerl Date: Mon, 29 Jul 2024 16:16:14 +0200 Subject: [PATCH] autopilot: address comments --- autopilot/alerts.go | 3 +-- autopilot/migrator.go | 48 +++++++++++++++++++++++-------------------- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/autopilot/alerts.go b/autopilot/alerts.go index ba34cb5b5..5777748d2 100644 --- a/autopilot/alerts.go +++ b/autopilot/alerts.go @@ -170,8 +170,7 @@ func newRefreshHealthFailedAlert(err error) alerts.Alert { Severity: alerts.SeverityCritical, Message: "Health refresh failed", Data: map[string]interface{}{ - "migrationsInterrupted": false, - "error": err.Error(), + "error": err.Error(), }, Timestamp: time.Now(), } diff --git a/autopilot/migrator.go b/autopilot/migrator.go index 25225f592..cf4195a89 100644 --- a/autopilot/migrator.go +++ b/autopilot/migrator.go @@ -206,28 +206,20 @@ func (m *migrator) performMigrations(p *workerPool) { default: } -OUTER: - for { - // fetch currently configured set - autopilot, err := m.ap.Config(m.ap.shutdownCtx) - if err != nil { - m.logger.Errorf("failed to fetch autopilot config: %w", err) - return - } - set := autopilot.Config.Contracts.Set - if set == "" { - m.logger.Error("could not perform migrations, no contract set configured") - return - } - - // recompute health. - start := time.Now() - if err := b.RefreshHealth(m.ap.shutdownCtx); err != nil { - m.ap.RegisterAlert(m.ap.shutdownCtx, newRefreshHealthFailedAlert(err)) - m.logger.Errorf("failed to recompute cached health before migration: %v", err) - } - m.logger.Infof("recomputed slab health in %v", time.Since(start)) + // fetch currently configured set + autopilot, err := m.ap.Config(m.ap.shutdownCtx) + if err != nil { + m.logger.Errorf("failed to fetch autopilot config: %w", err) + return + } + set := autopilot.Config.Contracts.Set + if set == "" { + m.logger.Error("could not perform migrations, no contract set configured") + return + } + // helper to update 'toMigrate' + updateToMigrate := func() { // fetch slabs for migration toMigrateNew, err := b.SlabsForMigration(m.ap.shutdownCtx, m.healthCutoff, set, migratorBatchSize) if err != nil { @@ -266,7 +258,19 @@ OUTER: sort.Slice(newSlabs, func(i, j int) bool { return newSlabs[i].Health < newSlabs[j].Health }) - migrateNewMap = nil // free map + } + +OUTER: + for { + // recompute health. + start := time.Now() + if err := b.RefreshHealth(m.ap.shutdownCtx); err != nil { + m.ap.RegisterAlert(m.ap.shutdownCtx, newRefreshHealthFailedAlert(err)) + m.logger.Errorf("failed to recompute cached health before migration: %v", err) + } else { + m.logger.Infof("recomputed slab health in %v", time.Since(start)) + updateToMigrate() + } // log the updated list of slabs to migrate m.logger.Infof("%d slabs to migrate", len(toMigrate))