Skip to content

Commit

Permalink
Update worker constants for pruning (#1278)
Browse files Browse the repository at this point in the history
With this update Arequipa has been pruning all night without producing
any "EOF" alerts.
That's because the responses become small enough so that the host
doesn't close the connection.

1000 roots per batch is significantly smaller than the 500k we had
before but the roundabout with `hostd` should be fast enough to not
cause too much of a slowdown. It's also a lot better for `hostd`'s db
since it won't have to commit as many changes at once.
  • Loading branch information
ChrisSchinnerl authored Jun 7, 2024
1 parent 4f8c7da commit e4e59b3
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 10 deletions.
8 changes: 5 additions & 3 deletions autopilot/contract_pruning.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func (ap *Autopilot) pruneContract(w Worker, fcid types.FileContractID, hk types
ap.mu.Lock()
defer ap.mu.Unlock()
alertID := alerts.IDForContract(alertPruningID, fcid)
if shouldSendPruneAlert(err, hostVersion) {
if shouldSendPruneAlert(err, hostVersion, hostRelease) {
ap.RegisterAlert(ctx, newContractPruningFailedAlert(hk, hostVersion, hostRelease, fcid, err))
ap.pruningAlertIDs[fcid] = alertID // store id to dismiss stale alerts
} else {
Expand Down Expand Up @@ -236,8 +236,10 @@ func humanReadableSize(b int) string {
float64(b)/float64(div), "KMGTPE"[exp])
}

func shouldSendPruneAlert(err error, version string) bool {
return err != nil && !((utils.IsErr(err, errInvalidMerkleProof) && build.VersionCmp(version, "1.6.0") < 0) ||
func shouldSendPruneAlert(err error, version, release string) bool {
merkleRootIssue := utils.IsErr(err, errInvalidMerkleProof) &&
(build.VersionCmp(version, "1.6.0") < 0 || version == "1.6.0" && release == "")
return err != nil && !(merkleRootIssue ||
utils.IsErr(err, utils.ErrConnectionRefused) ||
utils.IsErr(err, utils.ErrConnectionTimedOut) ||
utils.IsErr(err, utils.ErrConnectionResetByPeer) ||
Expand Down
4 changes: 3 additions & 1 deletion autopilot/contractor/alerts.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,16 @@ func newContractRenewalFailedAlert(contract api.ContractMetadata, interrupted bo
}
}

func newLostSectorsAlert(hk types.PublicKey, lostSectors uint64) alerts.Alert {
func newLostSectorsAlert(hk types.PublicKey, version, release string, lostSectors uint64) alerts.Alert {
return alerts.Alert{
ID: alerts.IDForHost(alertLostSectorsID, hk),
Severity: alerts.SeverityWarning,
Message: "Host has lost sectors",
Data: map[string]interface{}{
"lostSectors": lostSectors,
"hostKey": hk.String(),
"hostVersion": version,
"hostRelease": release,
"hint": "The host has reported that it can't serve at least one sector. Consider blocking this host through the blocklist feature. If you think this was a mistake and you want to ignore this warning for now you can reset the lost sector count",
},
Timestamp: time.Now(),
Expand Down
2 changes: 1 addition & 1 deletion autopilot/contractor/contractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ func (c *Contractor) performContractMaintenance(ctx *mCtx, w Worker) (bool, erro
var toDismiss []types.Hash256
for _, h := range hosts {
if registerLostSectorsAlert(h.Interactions.LostSectors*rhpv2.SectorSize, h.StoredData) {
c.alerter.RegisterAlert(ctx, newLostSectorsAlert(h.PublicKey, h.Interactions.LostSectors))
c.alerter.RegisterAlert(ctx, newLostSectorsAlert(h.PublicKey, h.Settings.Version, h.Settings.Release, h.Interactions.LostSectors))
} else {
toDismiss = append(toDismiss, alerts.IDForHost(alertLostSectorsID, h.PublicKey))
}
Expand Down
6 changes: 3 additions & 3 deletions worker/rhpv2.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ const (
minMessageSize = 4096

// maxMerkleProofResponseSize caps the response message size to a generous
// 32 MiB max length since batchSizeDeleteSectors assumes ~16MiB of
// roots. So we double that to be safe.
maxMerkleProofResponseSize = 8 * 1 << 22 // 32 MiB
// value of 100 MB worth of roots. This is approximately double the size of
// what we have observed on the live network for 5TB+ contracts to be safe.
maxMerkleProofResponseSize = 100 * 1 << 20 // 100 MB
)

var (
Expand Down
4 changes: 2 additions & 2 deletions worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ import (
)

const (
batchSizeDeleteSectors = uint64(500000) // ~16MiB of roots
batchSizeFetchSectors = uint64(130000) // ~4MiB of roots
batchSizeDeleteSectors = uint64(1000) // 4GiB of contract data
batchSizeFetchSectors = uint64(25600) // 100GiB of contract data

defaultLockTimeout = time.Minute
defaultRevisionFetchTimeout = 30 * time.Second
Expand Down

0 comments on commit e4e59b3

Please sign in to comment.