diff --git a/autopilot/contractor.go b/autopilot/contractor.go index 7b2ea9863..49ba304ae 100644 --- a/autopilot/contractor.go +++ b/autopilot/contractor.go @@ -33,6 +33,10 @@ const ( // contract. estimatedFileContractTransactionSetSize = 2048 + // failedRenewalForgivenessPeriod is the amount of time we wait before + // punishing a contract for not being able to refresh + failedRefreshForgivenessPeriod = 24 * time.Hour + // leewayPctCandidateHosts is the leeway we apply when fetching candidate // hosts, we fetch ~10% more than required leewayPctCandidateHosts = 1.1 @@ -96,6 +100,8 @@ type ( revisionLastBroadcast map[types.FileContractID]time.Time revisionSubmissionBuffer uint64 + firstRefreshFailure map[types.FileContractID]time.Time + mu sync.Mutex pruning bool @@ -162,6 +168,8 @@ func newContractor(ap *Autopilot, revisionSubmissionBuffer uint64, revisionBroad revisionLastBroadcast: make(map[types.FileContractID]time.Time), revisionSubmissionBuffer: revisionSubmissionBuffer, + firstRefreshFailure: make(map[types.FileContractID]time.Time), + resolver: newIPResolver(ap.shutdownCtx, resolverLookupTimeout, ap.logger.Named("resolver")), } } @@ -226,6 +234,9 @@ func (c *contractor) performContractMaintenance(ctx context.Context, w Worker) ( contracts := resp.Contracts c.logger.Infof("fetched %d contracts from the worker, took %v", len(resp.Contracts), time.Since(start)) + // prune contract refresh failure map + c.pruneContractRefreshFailures(contracts) + // run revision broadcast c.runRevisionBroadcast(ctx, w, contracts, isInCurrentSet) @@ -1624,6 +1635,27 @@ func (c *contractor) hostForContract(ctx context.Context, fcid types.FileContrac return } +func (c *contractor) pruneContractRefreshFailures(contracts []api.Contract) { + contractMap := make(map[types.FileContractID]struct{}) + for _, contract := range contracts { + contractMap[contract.ID] = struct{}{} + } + for fcid := range c.firstRefreshFailure { + if _, ok := contractMap[fcid]; !ok { + delete(c.firstRefreshFailure, fcid) + } + } +} + +func (c *contractor) shouldForgiveFailedRefresh(fcid types.FileContractID) bool { + lastFailure, exists := c.firstRefreshFailure[fcid] + if !exists { + lastFailure = time.Now() + c.firstRefreshFailure[fcid] = lastFailure + } + return time.Since(lastFailure) < failedRefreshForgivenessPeriod +} + func addLeeway(n uint64, pct float64) uint64 { if pct < 0 { panic("given leeway percent has to be positive") diff --git a/autopilot/contractor_test.go b/autopilot/contractor_test.go index 575605612..9ce54daf5 100644 --- a/autopilot/contractor_test.go +++ b/autopilot/contractor_test.go @@ -3,8 +3,12 @@ package autopilot import ( "math" "testing" + "time" + "go.sia.tech/core/types" + "go.sia.tech/renterd/api" "go.uber.org/zap" + "lukechampine.com/frand" ) func TestCalculateMinScore(t *testing.T) { @@ -35,3 +39,30 @@ func TestCalculateMinScore(t *testing.T) { t.Fatalf("expected minScore to be math.SmallestNonzeroFLoat64 but was %v", minScore) } } + +func TestShouldForgiveFailedRenewal(t *testing.T) { + var fcid types.FileContractID + frand.Read(fcid[:]) + c := &contractor{ + firstRefreshFailure: make(map[types.FileContractID]time.Time), + } + + // try twice since the first time will set the failure time + if !c.shouldForgiveFailedRefresh(fcid) { + t.Fatal("should forgive") + } else if !c.shouldForgiveFailedRefresh(fcid) { + t.Fatal("should forgive") + } + + // set failure to be a full period in the past + c.firstRefreshFailure[fcid] = time.Now().Add(-failedRefreshForgivenessPeriod - time.Second) + if c.shouldForgiveFailedRefresh(fcid) { + t.Fatal("should not forgive") + } + + // prune map + c.pruneContractRefreshFailures([]api.Contract{}) + if len(c.firstRefreshFailure) != 0 { + t.Fatal("expected no failures") + } +} diff --git a/autopilot/hostfilter.go b/autopilot/hostfilter.go index f41a20c94..d64c1f3e3 100644 --- a/autopilot/hostfilter.go +++ b/autopilot/hostfilter.go @@ -254,8 +254,8 @@ func (c *contractor) isUsableContract(cfg api.AutopilotConfig, state state, ci c } if isOutOfFunds(cfg, pt, contract) { reasons = append(reasons, errContractOutOfFunds.Error()) - usable = false - recoverable = true + usable = usable && c.shouldForgiveFailedRefresh(contract.ID) + recoverable = !usable // only needs to be recoverable if !usable refresh = true renew = false }