Skip to content

Commit

Permalink
Only register lost sectors alert if the percentage of sectors lost ex…
Browse files Browse the repository at this point in the history
…ceeds a certain threshold. (#1187)

I noticed a handful of "Host has lost sectors" alerts on my node and
`arequipa`. Since we hint at blocking these hosts and since these alerts
aren't really all that actionable I introduced a threshold, as Nate
suggested in the last engineering sync. This is hardcoded to be 1% of
the host's stored data.
  • Loading branch information
ChrisSchinnerl authored Apr 23, 2024
2 parents 14706f0 + 1c2d180 commit 712c1d8
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 1 deletion.
11 changes: 11 additions & 0 deletions autopilot/contractor/alerts.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ import (
"go.sia.tech/renterd/api"
)

const (
// alertLostSectorsThresholdPct defines the the threshold at which we
// register the lost sectors alert. A value of 0.01 means that we register
// the alert if the host lost 1% (or more) of its stored data.
alertLostSectorsThresholdPct = 0.01
)

var (
alertChurnID = alerts.RandomAlertID() // constant until restarted
alertLostSectorsID = alerts.RandomAlertID() // constant until restarted
Expand Down Expand Up @@ -47,3 +54,7 @@ func newLostSectorsAlert(hk types.PublicKey, lostSectors uint64) alerts.Alert {
Timestamp: time.Now(),
}
}

func registerLostSectorsAlert(dataLost, dataStored uint64) bool {
return dataLost > 0 && float64(dataLost) >= float64(dataStored)*alertLostSectorsThresholdPct
}
26 changes: 26 additions & 0 deletions autopilot/contractor/alerts_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package contractor

import (
"testing"

rhpv2 "go.sia.tech/core/rhp/v2"
)

func TestRegisterLostSectorsAlert(t *testing.T) {
for _, tc := range []struct {
dataLost uint64
dataStored uint64
expected bool
}{
{0, 0, false},
{0, rhpv2.SectorSize, false},
{rhpv2.SectorSize, 0, true},
{rhpv2.SectorSize, 99 * rhpv2.SectorSize, true},
{rhpv2.SectorSize, 100 * rhpv2.SectorSize, true}, // exactly 1%
{rhpv2.SectorSize, 101 * rhpv2.SectorSize, false}, // just short of 1%
} {
if result := registerLostSectorsAlert(tc.dataLost, tc.dataStored); result != tc.expected {
t.Fatalf("unexpected result for dataLost=%d, dataStored=%d: %v", tc.dataLost, tc.dataStored, result)
}
}
}
2 changes: 1 addition & 1 deletion autopilot/contractor/contractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ func (c *Contractor) performContractMaintenance(ctx *mCtx, w Worker) (bool, erro
// check if any used hosts have lost data to warn the user
var toDismiss []types.Hash256
for _, h := range hosts {
if h.Interactions.LostSectors > 0 {
if registerLostSectorsAlert(h.Interactions.LostSectors*rhpv2.SectorSize, h.StoredData) {
c.alerter.RegisterAlert(ctx, newLostSectorsAlert(h.PublicKey, h.Interactions.LostSectors))
} else {
toDismiss = append(toDismiss, alerts.IDForHost(alertLostSectorsID, h.PublicKey))
Expand Down

0 comments on commit 712c1d8

Please sign in to comment.