Skip to content

Commit

Permalink
Be more forgiving with contracts that fail to refresh by giving them …
Browse files Browse the repository at this point in the history
…a grace period (#1102)

Addresses one of the two tasks in
#1096
  • Loading branch information
ChrisSchinnerl authored Mar 27, 2024
2 parents 447b3d1 + bcc4591 commit 0e805ac
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 2 deletions.
32 changes: 32 additions & 0 deletions autopilot/contractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ const (
// contract.
estimatedFileContractTransactionSetSize = 2048

// failedRenewalForgivenessPeriod is the amount of time we wait before
// punishing a contract for not being able to refresh
failedRefreshForgivenessPeriod = 24 * time.Hour

// leewayPctCandidateHosts is the leeway we apply when fetching candidate
// hosts, we fetch ~10% more than required
leewayPctCandidateHosts = 1.1
Expand Down Expand Up @@ -96,6 +100,8 @@ type (
revisionLastBroadcast map[types.FileContractID]time.Time
revisionSubmissionBuffer uint64

firstRefreshFailure map[types.FileContractID]time.Time

mu sync.Mutex

pruning bool
Expand Down Expand Up @@ -162,6 +168,8 @@ func newContractor(ap *Autopilot, revisionSubmissionBuffer uint64, revisionBroad
revisionLastBroadcast: make(map[types.FileContractID]time.Time),
revisionSubmissionBuffer: revisionSubmissionBuffer,

firstRefreshFailure: make(map[types.FileContractID]time.Time),

resolver: newIPResolver(ap.shutdownCtx, resolverLookupTimeout, ap.logger.Named("resolver")),
}
}
Expand Down Expand Up @@ -226,6 +234,9 @@ func (c *contractor) performContractMaintenance(ctx context.Context, w Worker) (
contracts := resp.Contracts
c.logger.Infof("fetched %d contracts from the worker, took %v", len(resp.Contracts), time.Since(start))

// prune contract refresh failure map
c.pruneContractRefreshFailures(contracts)

// run revision broadcast
c.runRevisionBroadcast(ctx, w, contracts, isInCurrentSet)

Expand Down Expand Up @@ -1624,6 +1635,27 @@ func (c *contractor) hostForContract(ctx context.Context, fcid types.FileContrac
return
}

func (c *contractor) pruneContractRefreshFailures(contracts []api.Contract) {
contractMap := make(map[types.FileContractID]struct{})
for _, contract := range contracts {
contractMap[contract.ID] = struct{}{}
}
for fcid := range c.firstRefreshFailure {
if _, ok := contractMap[fcid]; !ok {
delete(c.firstRefreshFailure, fcid)
}
}
}

func (c *contractor) shouldForgiveFailedRefresh(fcid types.FileContractID) bool {
lastFailure, exists := c.firstRefreshFailure[fcid]
if !exists {
lastFailure = time.Now()
c.firstRefreshFailure[fcid] = lastFailure
}
return time.Since(lastFailure) < failedRefreshForgivenessPeriod
}

func addLeeway(n uint64, pct float64) uint64 {
if pct < 0 {
panic("given leeway percent has to be positive")
Expand Down
31 changes: 31 additions & 0 deletions autopilot/contractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@ package autopilot
import (
"math"
"testing"
"time"

"go.sia.tech/core/types"
"go.sia.tech/renterd/api"
"go.uber.org/zap"
"lukechampine.com/frand"
)

func TestCalculateMinScore(t *testing.T) {
Expand Down Expand Up @@ -35,3 +39,30 @@ func TestCalculateMinScore(t *testing.T) {
t.Fatalf("expected minScore to be math.SmallestNonzeroFLoat64 but was %v", minScore)
}
}

func TestShouldForgiveFailedRenewal(t *testing.T) {
var fcid types.FileContractID
frand.Read(fcid[:])
c := &contractor{
firstRefreshFailure: make(map[types.FileContractID]time.Time),
}

// try twice since the first time will set the failure time
if !c.shouldForgiveFailedRefresh(fcid) {
t.Fatal("should forgive")
} else if !c.shouldForgiveFailedRefresh(fcid) {
t.Fatal("should forgive")
}

// set failure to be a full period in the past
c.firstRefreshFailure[fcid] = time.Now().Add(-failedRefreshForgivenessPeriod - time.Second)
if c.shouldForgiveFailedRefresh(fcid) {
t.Fatal("should not forgive")
}

// prune map
c.pruneContractRefreshFailures([]api.Contract{})
if len(c.firstRefreshFailure) != 0 {
t.Fatal("expected no failures")
}
}
4 changes: 2 additions & 2 deletions autopilot/hostfilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ func (c *contractor) isUsableContract(cfg api.AutopilotConfig, state state, ci c
}
if isOutOfFunds(cfg, pt, contract) {
reasons = append(reasons, errContractOutOfFunds.Error())
usable = false
recoverable = true
usable = usable && c.shouldForgiveFailedRefresh(contract.ID)
recoverable = !usable // only needs to be recoverable if !usable
refresh = true
renew = false
}
Expand Down

0 comments on commit 0e805ac

Please sign in to comment.