diff --git a/alerts/alerts.go b/alerts/alerts.go index b0d4963c6..f11004dbe 100644 --- a/alerts/alerts.go +++ b/alerts/alerts.go @@ -35,6 +35,7 @@ const ( type ( Alerter interface { + Alerts(_ context.Context, opts AlertsOpts) (resp AlertsResponse, err error) RegisterAlert(_ context.Context, a Alert) error DismissAlerts(_ context.Context, ids ...types.Hash256) error } @@ -169,17 +170,18 @@ func (m *Manager) DismissAlerts(ctx context.Context, ids ...types.Hash256) error }) } -// Active returns the host's active alerts. -func (m *Manager) Active(offset, limit int) AlertsResponse { +// Alerts returns the host's active alerts. +func (m *Manager) Alerts(_ context.Context, opts AlertsOpts) (AlertsResponse, error) { m.mu.Lock() defer m.mu.Unlock() + offset, limit := opts.Offset, opts.Limit resp := AlertsResponse{ Total: len(m.alerts), } if offset >= len(m.alerts) { - return resp + return resp, nil } else if limit == -1 { limit = len(m.alerts) } @@ -197,7 +199,7 @@ func (m *Manager) Active(offset, limit int) AlertsResponse { resp.HasMore = true } resp.Alerts = alerts - return resp + return resp, nil } func (m *Manager) RegisterWebhookBroadcaster(b webhooks.Broadcaster) { @@ -231,6 +233,11 @@ func WithOrigin(alerter Alerter, origin string) Alerter { } } +// Alerts implements the Alerter interface. +func (a *originAlerter) Alerts(ctx context.Context, opts AlertsOpts) (resp AlertsResponse, err error) { + return a.alerter.Alerts(ctx, opts) +} + // RegisterAlert implements the Alerter interface. func (a *originAlerter) RegisterAlert(ctx context.Context, alert Alert) error { if alert.Data == nil { diff --git a/autopilot/alerts.go b/autopilot/alerts.go index 292670dc5..f4762c4d4 100644 --- a/autopilot/alerts.go +++ b/autopilot/alerts.go @@ -14,12 +14,13 @@ import ( ) var ( - alertAccountRefillID = frand.Entropy256() // constant until restarted - alertLostSectorsID = frand.Entropy256() // constant until restarted - alertLowBalanceID = frand.Entropy256() // constant until restarted - alertMigrationID = frand.Entropy256() // constant until restarted - alertPruningID = frand.Entropy256() // constant until restarted - alertRenewalFailedID = frand.Entropy256() // constant until restarted + alertAccountRefillID = randomAlertID() // constant until restarted + alertChurnID = randomAlertID() // constant until restarted + alertLostSectorsID = randomAlertID() // constant until restarted + alertLowBalanceID = randomAlertID() // constant until restarted + alertMigrationID = randomAlertID() // constant until restarted + alertPruningID = randomAlertID() // constant until restarted + alertRenewalFailedID = randomAlertID() // constant until restarted ) func alertIDForAccount(alertID [32]byte, id rhpv3.Account) types.Hash256 { @@ -54,6 +55,20 @@ func (ap *Autopilot) DismissAlert(ctx context.Context, ids ...types.Hash256) { } } +func (ap *Autopilot) HasAlert(ctx context.Context, id types.Hash256) bool { + ar, err := ap.alerts.Alerts(ctx, alerts.AlertsOpts{Offset: 0, Limit: -1}) + if err != nil { + ap.logger.Errorf("failed to fetch alerts: %v", err) + return false + } + for _, alert := range ar.Alerts { + if alert.ID == id { + return true + } + } + return false +} + func newAccountLowBalanceAlert(address types.Address, balance, allowance types.Currency, bh, renewWindow, endHeight uint64) alerts.Alert { severity := alerts.SeverityInfo if bh+renewWindow/2 >= endHeight { @@ -137,37 +152,6 @@ func newContractPruningFailedAlert(hk types.PublicKey, version string, fcid type } } -func newContractSetChangeAlert(name string, additions map[types.FileContractID]contractSetAddition, removals map[types.FileContractID]contractSetRemoval) alerts.Alert { - var hint string - if len(removals) > 0 { - hint = "A high churn rate can lead to a lot of unnecessary migrations, it might be necessary to tweak your configuration depending on the reason hosts are being discarded from the set." - } - - removedReasons := make(map[string]string, len(removals)) - for k, v := range removals { - removedReasons[k.String()] = v.Reason - } - - return alerts.Alert{ - ID: randomAlertID(), - Severity: alerts.SeverityInfo, - Message: "Contract set changed", - Data: map[string]any{ - "name": name, - "set_additions": additions, - "set_removals": removals, - "hint": hint, - - // TODO: these fields can be removed on the next major release, they - // contain redundant information - "added": len(additions), - "removed": len(removals), - "removals": removedReasons, - }, - Timestamp: time.Now(), - } -} - func newLostSectorsAlert(hk types.PublicKey, lostSectors uint64) alerts.Alert { return alerts.Alert{ ID: alertIDForHost(alertLostSectorsID, hk), diff --git a/autopilot/churn.go b/autopilot/churn.go new file mode 100644 index 000000000..70c4651c2 --- /dev/null +++ b/autopilot/churn.go @@ -0,0 +1,63 @@ +package autopilot + +import ( + "time" + + "go.sia.tech/core/types" + "go.sia.tech/renterd/alerts" +) + +type ( + accumulatedChurn struct { + additions map[types.FileContractID][]contractSetAddition + removals map[types.FileContractID][]contractSetRemoval + } +) + +func newAccumulatedChurn() *accumulatedChurn { + return &accumulatedChurn{ + additions: make(map[types.FileContractID][]contractSetAddition), + removals: make(map[types.FileContractID][]contractSetRemoval), + } +} + +func (c *accumulatedChurn) Alert(name string) alerts.Alert { + var hint string + if len(c.removals) > 0 { + hint = "A high churn rate can lead to a lot of unnecessary migrations, it might be necessary to tweak your configuration depending on the reason hosts are being discarded from the set." + } + + removedReasons := make(map[string][]string, len(c.removals)) + for fcid, contractRemovals := range c.removals { + for _, removal := range contractRemovals { + removedReasons[fcid.String()] = append(removedReasons[fcid.String()], removal.Reason) + } + } + + return alerts.Alert{ + ID: alertChurnID, + Severity: alerts.SeverityInfo, + Message: "Contract set changed", + Data: map[string]any{ + "name": name, + "set_additions": c.additions, + "set_removals": c.removals, + "hint": hint, + }, + Timestamp: time.Now(), + } +} + +func (c *accumulatedChurn) Apply(additions map[types.FileContractID]contractSetAddition, removals map[types.FileContractID]contractSetRemoval) { + for fcid, addition := range additions { + c.additions[fcid] = append(c.additions[fcid], addition) + } + for fcid, removal := range removals { + c.removals[fcid] = append(c.removals[fcid], removal) + } +} + +func (c *accumulatedChurn) Reset() { + c.additions = make(map[types.FileContractID][]contractSetAddition) + c.removals = make(map[types.FileContractID][]contractSetRemoval) +} diff --git a/autopilot/contractor.go b/autopilot/contractor.go index 092f2a831..7909277f0 100644 --- a/autopilot/contractor.go +++ b/autopilot/contractor.go @@ -85,6 +85,7 @@ const ( type ( contractor struct { ap *Autopilot + churn *accumulatedChurn resolver *ipResolver logger *zap.SugaredLogger @@ -130,7 +131,7 @@ type ( contractSetRemoval struct { Size uint64 `json:"size"` HostKey types.PublicKey `json:"hostKey"` - Reason string `json:"reason"` + Reason string `json:"reasons"` } renewal struct { @@ -143,6 +144,7 @@ type ( func newContractor(ap *Autopilot, revisionSubmissionBuffer uint64, revisionBroadcastInterval time.Duration) *contractor { return &contractor{ ap: ap, + churn: newAccumulatedChurn(), logger: ap.logger.Named("contractor"), revisionBroadcastInterval: revisionBroadcastInterval, @@ -536,7 +538,11 @@ func (c *contractor) computeContractSetChanged(ctx context.Context, name string, ) hasChanged := len(setAdditions)+len(setRemovals) > 0 if hasChanged { - c.ap.RegisterAlert(ctx, newContractSetChangeAlert(name, setAdditions, setRemovals)) + if !c.ap.HasAlert(ctx, alertChurnID) { + c.churn.Reset() + } + c.churn.Apply(setAdditions, setRemovals) + c.ap.RegisterAlert(ctx, c.churn.Alert(name)) } return hasChanged } diff --git a/bus/bus.go b/bus/bus.go index 9ee6e1ba2..e7e6ddaac 100644 --- a/bus/bus.go +++ b/bus/bus.go @@ -1726,7 +1726,10 @@ func (b *bus) gougingParams(ctx context.Context) (api.GougingParams, error) { } func (b *bus) handleGETAlertsDeprecated(jc jape.Context) { - ar := b.alertMgr.Active(0, -1) + ar, err := b.alertMgr.Alerts(jc.Request.Context(), alerts.AlertsOpts{Offset: 0, Limit: -1}) + if jc.Check("failed to fetch alerts", err) != nil { + return + } jc.Encode(ar.Alerts) } @@ -1744,7 +1747,11 @@ func (b *bus) handleGETAlerts(jc jape.Context) { jc.Error(errors.New("offset must be non-negative"), http.StatusBadRequest) return } - jc.Encode(b.alertMgr.Active(offset, limit)) + ar, err := b.alertMgr.Alerts(jc.Request.Context(), alerts.AlertsOpts{Offset: offset, Limit: limit}) + if jc.Check("failed to fetch alerts", err) != nil { + return + } + jc.Encode(ar) } func (b *bus) handlePOSTAlertsDismiss(jc jape.Context) { diff --git a/bus/client/alerts.go b/bus/client/alerts.go index 7f2bf9aa7..7eceaeaed 100644 --- a/bus/client/alerts.go +++ b/bus/client/alerts.go @@ -10,13 +10,13 @@ import ( ) // Alerts fetches the active alerts from the bus. -func (c *Client) Alerts(opts alerts.AlertsOpts) (resp alerts.AlertsResponse, err error) { +func (c *Client) Alerts(ctx context.Context, opts alerts.AlertsOpts) (resp alerts.AlertsResponse, err error) { values := url.Values{} values.Set("offset", fmt.Sprint(opts.Offset)) if opts.Limit != 0 { values.Set("limit", fmt.Sprint(opts.Limit)) } - err = c.c.GET("/alerts?"+values.Encode(), &resp) + err = c.c.WithContext(ctx).GET("/alerts?"+values.Encode(), &resp) return } diff --git a/internal/testing/cluster_test.go b/internal/testing/cluster_test.go index 4fb62ff31..f30a0906a 100644 --- a/internal/testing/cluster_test.go +++ b/internal/testing/cluster_test.go @@ -1923,7 +1923,7 @@ func TestAlerts(t *testing.T) { tt.OK(b.RegisterAlert(context.Background(), alert)) findAlert := func(id types.Hash256) *alerts.Alert { t.Helper() - ar, err := b.Alerts(alerts.AlertsOpts{}) + ar, err := b.Alerts(context.Background(), alerts.AlertsOpts{}) tt.OK(err) for _, alert := range ar.Alerts { if alert.ID == id { @@ -1960,7 +1960,7 @@ func TestAlerts(t *testing.T) { } // try to find with offset = 1 - ar, err := b.Alerts(alerts.AlertsOpts{Offset: 1}) + ar, err := b.Alerts(context.Background(), alerts.AlertsOpts{Offset: 1}) foundAlerts := ar.Alerts tt.OK(err) if len(foundAlerts) != 1 || foundAlerts[0].ID != alert.ID { @@ -1968,7 +1968,7 @@ func TestAlerts(t *testing.T) { } // try to find with limit = 1 - ar, err = b.Alerts(alerts.AlertsOpts{Limit: 1}) + ar, err = b.Alerts(context.Background(), alerts.AlertsOpts{Limit: 1}) foundAlerts = ar.Alerts tt.OK(err) if len(foundAlerts) != 1 || foundAlerts[0].ID != alert2.ID {