From dd121fbc69d91bfbd7471cdfa35d4ac7ae28573b Mon Sep 17 00:00:00 2001 From: Chris Schinnerl Date: Fri, 6 Dec 2024 15:43:42 +0100 Subject: [PATCH 01/14] downloader: move downloadManager to its own package --- .../download/downloadmanager.go | 85 ++++++++++--------- worker/upload_test.go | 3 +- worker/worker.go | 21 ++--- worker/worker_test.go | 10 ++- 4 files changed, 65 insertions(+), 54 deletions(-) rename worker/download.go => internal/download/downloadmanager.go (88%) diff --git a/worker/download.go b/internal/download/downloadmanager.go similarity index 88% rename from worker/download.go rename to internal/download/downloadmanager.go index 6eecf5f06..98a9e6cbc 100644 --- a/worker/download.go +++ b/internal/download/downloadmanager.go @@ -1,4 +1,4 @@ -package worker +package download import ( "bufio" @@ -23,18 +23,26 @@ import ( "go.uber.org/zap" ) +type ObjectStore interface { + DeleteHostSector(ctx context.Context, hk types.PublicKey, root types.Hash256) error + FetchPartialSlab(ctx context.Context, key object.EncryptionKey, offset, length uint32) ([]byte, error) + Slab(ctx context.Context, key object.EncryptionKey) (object.Slab, error) +} + const ( downloadMemoryLimitDenom = 6 // 1/6th of the available download memory can be used by a single download ) var ( - errHostNoLongerUsable = errors.New("host no longer usable") - errDownloadNotEnoughHosts = errors.New("not enough hosts available to download the slab") - errDownloadCancelled = errors.New("download was cancelled") + ErrDownloadCancelled = errors.New("download was cancelled") + ErrDownloadNotEnoughHosts = errors.New("not enough hosts available to download the slab") + ErrShuttingDown = errors.New("download manager is shutting down") + + errHostNoLongerUsable = errors.New("host no longer usable") ) type ( - downloadManager struct { + Manager struct { hm host.HostManager mm memory.MemoryManager os ObjectStore @@ -54,7 +62,7 @@ type ( } slabDownload struct { - mgr *downloadManager + mgr *Manager minShards int offset uint64 @@ -88,12 +96,12 @@ type ( selected int } - downloadManagerStats struct { - avgDownloadSpeedMBPS float64 - avgOverdrivePct float64 - healthyDownloaders uint64 - numDownloaders uint64 - downloadSpeedsMBPS map[types.PublicKey]float64 + Stats struct { + AvgDownloadSpeedMBPS float64 + AvgOverdrivePct float64 + HealthyDownloaders uint64 + NumDownloaders uint64 + DownloadSpeedsMBPS map[types.PublicKey]float64 } ) @@ -107,16 +115,9 @@ func (s *sectorInfo) selectHost(h types.PublicKey) { } } -func (w *Worker) initDownloadManager(uploadKey *utils.UploadKey, maxMemory, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) { - if w.downloadManager != nil { - panic("download manager already initialized") // developer error - } - w.downloadManager = newDownloadManager(w.shutdownCtx, uploadKey, w, w.bus, maxMemory, maxOverdrive, overdriveTimeout, logger) -} - -func newDownloadManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, os ObjectStore, maxMemory, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *downloadManager { +func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, os ObjectStore, maxMemory, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { logger = logger.Named("downloadmanager") - return &downloadManager{ + return &Manager{ hm: hm, mm: memory.NewManager(maxMemory, logger), os: os, @@ -135,7 +136,7 @@ func newDownloadManager(ctx context.Context, uploadKey *utils.UploadKey, hm host } } -func (mgr *downloadManager) DownloadObject(ctx context.Context, w io.Writer, o object.Object, offset, length uint64, hosts []api.HostInfo) (err error) { +func (mgr *Manager) DownloadObject(ctx context.Context, w io.Writer, o object.Object, offset, length uint64, hosts []api.HostInfo) (err error) { // calculate what slabs we need var ss []slabSlice for _, s := range o.Slabs { @@ -237,7 +238,7 @@ func (mgr *downloadManager) DownloadObject(ctx context.Context, w io.Writer, o o } } if numAvailable < next.MinShards { - responseChan <- &slabDownloadResponse{err: fmt.Errorf("%w: %v/%v", errDownloadNotEnoughHosts, numAvailable, next.MinShards)} + responseChan <- &slabDownloadResponse{err: fmt.Errorf("%w: %v/%v", ErrDownloadNotEnoughHosts, numAvailable, next.MinShards)} return } @@ -277,7 +278,7 @@ outer: case <-mgr.shutdownCtx.Done(): return ErrShuttingDown case <-ctx.Done(): - return errDownloadCancelled + return ErrDownloadCancelled case resp = <-responseChan: } @@ -340,7 +341,7 @@ outer: return nil } -func (mgr *downloadManager) DownloadSlab(ctx context.Context, slab object.Slab, hosts []api.HostInfo) ([][]byte, error) { +func (mgr *Manager) DownloadSlab(ctx context.Context, slab object.Slab, hosts []api.HostInfo) ([][]byte, error) { // refresh the downloaders mgr.refreshDownloaders(hosts) @@ -387,7 +388,11 @@ func (mgr *downloadManager) DownloadSlab(ctx context.Context, slab object.Slab, return shards, err } -func (mgr *downloadManager) Stats() downloadManagerStats { +func (mgr *Manager) MemoryStatus() memory.Status { + return mgr.mm.Status() +} + +func (mgr *Manager) Stats() Stats { mgr.mu.Lock() defer mgr.mu.Unlock() @@ -401,16 +406,16 @@ func (mgr *downloadManager) Stats() downloadManagerStats { } } - return downloadManagerStats{ - avgDownloadSpeedMBPS: mgr.statsSlabDownloadSpeedBytesPerMS.Average() * 0.008, // convert bytes per ms to mbps, - avgOverdrivePct: mgr.statsOverdrivePct.Average(), - healthyDownloaders: numHealthy, - numDownloaders: uint64(len(mgr.downloaders)), - downloadSpeedsMBPS: speeds, + return Stats{ + AvgDownloadSpeedMBPS: mgr.statsSlabDownloadSpeedBytesPerMS.Average() * 0.008, // convert bytes per ms to mbps, + AvgOverdrivePct: mgr.statsOverdrivePct.Average(), + HealthyDownloaders: numHealthy, + NumDownloaders: uint64(len(mgr.downloaders)), + DownloadSpeedsMBPS: speeds, } } -func (mgr *downloadManager) Stop() { +func (mgr *Manager) Stop() { mgr.mu.Lock() defer mgr.mu.Unlock() for _, d := range mgr.downloaders { @@ -418,7 +423,7 @@ func (mgr *downloadManager) Stop() { } } -func (mgr *downloadManager) numDownloaders() int { +func (mgr *Manager) numDownloaders() int { mgr.mu.Lock() defer mgr.mu.Unlock() return len(mgr.downloaders) @@ -427,7 +432,7 @@ func (mgr *downloadManager) numDownloaders() int { // fetchPartialSlab fetches the data of a partial slab from the bus. It will // fall back to ask the bus for the slab metadata in case the slab wasn't found // in the partial slab buffer. -func (mgr *downloadManager) fetchPartialSlab(ctx context.Context, key object.EncryptionKey, offset, length uint32) ([]byte, *object.Slab, error) { +func (mgr *Manager) fetchPartialSlab(ctx context.Context, key object.EncryptionKey, offset, length uint32) ([]byte, *object.Slab, error) { data, err := mgr.os.FetchPartialSlab(ctx, key, offset, length) if utils.IsErr(err, api.ErrObjectNotFound) { // Check if slab was already uploaded. @@ -442,7 +447,7 @@ func (mgr *downloadManager) fetchPartialSlab(ctx context.Context, key object.Enc return data, nil, nil } -func (mgr *downloadManager) refreshDownloaders(hosts []api.HostInfo) { +func (mgr *Manager) refreshDownloaders(hosts []api.HostInfo) { mgr.mu.Lock() defer mgr.mu.Unlock() @@ -475,7 +480,7 @@ func (mgr *downloadManager) refreshDownloaders(hosts []api.HostInfo) { } } -func (mgr *downloadManager) newSlabDownload(slice object.SlabSlice) *slabDownload { +func (mgr *Manager) newSlabDownload(slice object.SlabSlice) *slabDownload { // calculate the offset and length offset, length := slice.SectorRegion() @@ -508,7 +513,7 @@ func (mgr *downloadManager) newSlabDownload(slice object.SlabSlice) *slabDownloa } } -func (mgr *downloadManager) downloadSlab(ctx context.Context, slice object.SlabSlice) ([][]byte, error) { +func (mgr *Manager) downloadSlab(ctx context.Context, slice object.SlabSlice) ([][]byte, error) { // prepare new download slab := mgr.newSlabDownload(slice) @@ -639,7 +644,7 @@ func (s *slabDownload) nextRequest(ctx context.Context, resps *downloader.Sector // we don't know if the download failed at this point so we register an // error that gets propagated in case it did - s.errs[types.PublicKey{}] = fmt.Errorf("%w: no more hosts", errDownloadNotEnoughHosts) + s.errs[types.PublicKey{}] = fmt.Errorf("%w: no more hosts", ErrDownloadNotEnoughHosts) return nil } @@ -795,7 +800,7 @@ func (s *slabDownload) receive(resp downloader.SectorDownloadResp) (finished boo return s.numCompleted >= s.minShards } -func (mgr *downloadManager) fastest(hosts []types.PublicKey) (fastest *downloader.Downloader) { +func (mgr *Manager) fastest(hosts []types.PublicKey) (fastest *downloader.Downloader) { mgr.mu.Lock() defer mgr.mu.Unlock() lowest := math.MaxFloat64 diff --git a/worker/upload_test.go b/worker/upload_test.go index b1e0d49a3..916e8d146 100644 --- a/worker/upload_test.go +++ b/worker/upload_test.go @@ -11,6 +11,7 @@ import ( rhpv2 "go.sia.tech/core/rhp/v2" "go.sia.tech/core/types" "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/download" "go.sia.tech/renterd/internal/test" "go.sia.tech/renterd/object" "lukechampine.com/frand" @@ -115,7 +116,7 @@ func TestUpload(t *testing.T) { // download the data again and assert it fails buf.Reset() err = dl.DownloadObject(context.Background(), &buf, *o.Object, 0, uint64(o.Size), filtered) - if !errors.Is(err, errDownloadNotEnoughHosts) { + if !errors.Is(err, download.ErrDownloadNotEnoughHosts) { t.Fatal("expected not enough hosts error", err) } diff --git a/worker/worker.go b/worker/worker.go index b2e45deaa..ef26b7242 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -24,6 +24,7 @@ import ( "go.sia.tech/renterd/api" "go.sia.tech/renterd/build" "go.sia.tech/renterd/config" + "go.sia.tech/renterd/internal/download" "go.sia.tech/renterd/internal/gouging" "go.sia.tech/renterd/internal/prices" "go.sia.tech/renterd/internal/rhp" @@ -170,7 +171,7 @@ type Worker struct { masterKey utils.MasterKey startTime time.Time - downloadManager *downloadManager + downloadManager *download.Manager uploadManager *uploadManager accounts *iworker.AccountMgr @@ -282,7 +283,7 @@ func (w *Worker) downloadsStatsHandlerGET(jc jape.Context) { // prepare downloaders stats var dss []api.DownloaderStats - for hk, mbps := range stats.downloadSpeedsMBPS { + for hk, mbps := range stats.DownloadSpeedsMBPS { dss = append(dss, api.DownloaderStats{ HostKey: hk, AvgSectorDownloadSpeedMBPS: mbps, @@ -294,10 +295,10 @@ func (w *Worker) downloadsStatsHandlerGET(jc jape.Context) { // encode response api.WriteResponse(jc, api.DownloadStatsResponse{ - AvgDownloadSpeedMBPS: math.Ceil(stats.avgDownloadSpeedMBPS*100) / 100, - AvgOverdrivePct: math.Floor(stats.avgOverdrivePct*100*100) / 100, - HealthyDownloaders: stats.healthyDownloaders, - NumDownloaders: stats.numDownloaders, + AvgDownloadSpeedMBPS: math.Ceil(stats.AvgDownloadSpeedMBPS*100) / 100, + AvgOverdrivePct: math.Floor(stats.AvgOverdrivePct*100*100) / 100, + HealthyDownloaders: stats.HealthyDownloaders, + NumDownloaders: stats.NumDownloaders, DownloadersStats: dss, }) } @@ -623,7 +624,7 @@ func (w *Worker) idHandlerGET(jc jape.Context) { func (w *Worker) memoryGET(jc jape.Context) { api.WriteResponse(jc, api.MemoryResponse{ - Download: w.downloadManager.mm.Status(), + Download: w.downloadManager.MemoryStatus(), Upload: w.uploadManager.mm.Status(), }) } @@ -720,7 +721,7 @@ func New(cfg config.Worker, masterKey [32]byte, b Bus, l *zap.Logger) (*Worker, } uploadKey := w.masterKey.DeriveUploadKey() - w.initDownloadManager(&uploadKey, cfg.DownloadMaxMemory, cfg.DownloadMaxOverdrive, cfg.DownloadOverdriveTimeout, l) + w.downloadManager = download.NewManager(w.shutdownCtx, &uploadKey, w, w.bus, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) w.initUploadManager(&uploadKey, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) w.initContractSpendingRecorder(cfg.BusFlushInterval) @@ -874,8 +875,8 @@ func (w *Worker) GetObject(ctx context.Context, bucket, key string, opts api.Dow err = w.downloadManager.DownloadObject(ctx, wr, obj, uint64(offset), uint64(length), hosts) if err != nil { w.logger.Error(err) - if !errors.Is(err, ErrShuttingDown) && - !errors.Is(err, errDownloadCancelled) && + if !errors.Is(err, download.ErrShuttingDown) && + !errors.Is(err, download.ErrDownloadCancelled) && !errors.Is(err, io.ErrClosedPipe) { w.registerAlert(newDownloadFailedAlert(bucket, key, offset, length, int64(len(hosts)), err)) } diff --git a/worker/worker_test.go b/worker/worker_test.go index 2d605abfe..2f4e5069d 100644 --- a/worker/worker_test.go +++ b/worker/worker_test.go @@ -9,8 +9,10 @@ import ( "go.sia.tech/core/types" "go.sia.tech/renterd/api" "go.sia.tech/renterd/config" + "go.sia.tech/renterd/internal/download" "go.sia.tech/renterd/internal/test" "go.sia.tech/renterd/internal/test/mocks" + "go.sia.tech/renterd/internal/utils" "go.uber.org/zap" "golang.org/x/crypto/blake2b" "lukechampine.com/frand" @@ -44,15 +46,17 @@ func newTestWorker(t test.TestingCommon) *testWorker { ulmm := mocks.NewMemoryManager() // create worker - w, err := New(newTestWorkerCfg(), blake2b.Sum256([]byte("testwork")), b, zap.NewNop()) + cfg := newTestWorkerCfg() + mk := utils.MasterKey(blake2b.Sum256([]byte("testwork"))) + w, err := New(cfg, mk, b, zap.NewNop()) if err != nil { t.Fatal(err) } // override managers hm := newTestHostManager(t) - w.downloadManager.hm = hm - w.downloadManager.mm = dlmm + uploadKey := mk.DeriveUploadKey() + w.downloadManager = download.NewManager(context.Background(), &uploadKey, hm, b, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, zap.NewNop()) w.uploadManager.hm = hm w.uploadManager.mm = ulmm From d9fec9fb87dd6854abdcbf54f1eb0339b2eba7ae Mon Sep 17 00:00:00 2001 From: PJ Date: Mon, 9 Dec 2024 14:17:58 +0100 Subject: [PATCH 02/14] internal: move upload manager --- internal/upload/mimereader.go | 15 + internal/upload/uploader/uploader.go | 20 +- internal/upload/uploader/uploader_test.go | 11 +- internal/upload/uploadmanager.go | 959 ++++++++++++++++++++++ internal/upload/uploadmanager_test.go | 65 ++ internal/upload/uploadparams.go | 88 ++ worker/bench_test.go | 48 +- worker/migrations.go | 23 +- worker/upload.go | 949 +-------------------- worker/upload_params.go | 88 -- worker/upload_test.go | 228 ++--- worker/upload_utils.go | 28 - worker/worker.go | 47 +- worker/worker_test.go | 24 +- 14 files changed, 1319 insertions(+), 1274 deletions(-) create mode 100644 internal/upload/mimereader.go create mode 100644 internal/upload/uploadmanager.go create mode 100644 internal/upload/uploadmanager_test.go create mode 100644 internal/upload/uploadparams.go delete mode 100644 worker/upload_params.go delete mode 100644 worker/upload_utils.go diff --git a/internal/upload/mimereader.go b/internal/upload/mimereader.go new file mode 100644 index 000000000..e769a4d7d --- /dev/null +++ b/internal/upload/mimereader.go @@ -0,0 +1,15 @@ +package upload + +import ( + "bytes" + "io" + + "github.com/gabriel-vasile/mimetype" +) + +func NewMimeReader(r io.Reader) (mimeType string, recycled io.Reader, err error) { + buf := bytes.NewBuffer(nil) + mtype, err := mimetype.DetectReader(io.TeeReader(r, buf)) + recycled = io.MultiReader(buf, r) + return mtype.String(), recycled, err +} diff --git a/internal/upload/uploader/uploader.go b/internal/upload/uploader/uploader.go index fff30ddb0..9b79070f6 100644 --- a/internal/upload/uploader/uploader.go +++ b/internal/upload/uploader/uploader.go @@ -95,7 +95,7 @@ type ( } ) -func New(ctx context.Context, cl locking.ContractLocker, cs ContractStore, hm host.HostManager, hi api.HostInfo, cm api.ContractMetadata, l *zap.SugaredLogger) *Uploader { +func New(ctx context.Context, cl locking.ContractLocker, cs ContractStore, hm host.HostManager, hi api.HostInfo, fcid types.FileContractID, endHeight uint64, l *zap.SugaredLogger) *Uploader { return &Uploader{ cl: cl, cs: cs, @@ -103,7 +103,7 @@ func New(ctx context.Context, cl locking.ContractLocker, cs ContractStore, hm ho logger: l, // static - hk: cm.HostKey, + hk: hi.PublicKey, shutdownCtx: ctx, signalNewUpload: make(chan struct{}, 1), @@ -112,9 +112,9 @@ func New(ctx context.Context, cl locking.ContractLocker, cs ContractStore, hm ho statsSectorUploadSpeedBytesPerMS: utils.NewDataPoints(0), // covered by mutex - host: hm.Uploader(hi, cm.ID), - fcid: cm.ID, - endHeight: cm.WindowEnd, + host: hm.Uploader(hi, fcid), + fcid: fcid, + endHeight: endHeight, queue: make([]*SectorUploadReq, 0), } } @@ -145,16 +145,16 @@ func (u *Uploader) PublicKey() types.PublicKey { return u.hk } -func (u *Uploader) Refresh(hi *api.HostInfo, cm api.ContractMetadata) { +func (u *Uploader) Refresh(hi *api.HostInfo, fcid types.FileContractID, endHeight uint64) { u.mu.Lock() defer u.mu.Unlock() if hi != nil { - u.host = u.hm.Uploader(*hi, cm.ID) + u.host = u.hm.Uploader(*hi, fcid) } - u.fcid = cm.ID - u.endHeight = cm.WindowEnd + u.endHeight = endHeight + u.fcid = fcid } func (u *Uploader) Start() { @@ -424,7 +424,7 @@ func (u *Uploader) tryRefresh(ctx context.Context) bool { } // renew the uploader with the renewed contract - u.Refresh(nil, renewed) + u.Refresh(nil, renewed.ID, renewed.WindowEnd) return true } diff --git a/internal/upload/uploader/uploader_test.go b/internal/upload/uploader/uploader_test.go index b5d5f2a64..792406fae 100644 --- a/internal/upload/uploader/uploader_test.go +++ b/internal/upload/uploader/uploader_test.go @@ -18,10 +18,12 @@ import ( func TestUploaderStopped(t *testing.T) { cs := mocks.NewContractStore() hm := mocks.NewHostManager() - c := mocks.NewContract(types.PublicKey{1}, types.FileContractID{1}) cl := mocks.NewContractLocker() - ul := New(context.Background(), cl, cs, hm, api.HostInfo{}, c.Metadata(), zap.NewNop().Sugar()) + c := mocks.NewContract(types.PublicKey{1}, types.FileContractID{1}) + md := c.Metadata() + + ul := New(context.Background(), cl, cs, hm, api.HostInfo{}, md.ID, md.WindowEnd, zap.NewNop().Sugar()) ul.Stop(errors.New("test")) req := SectorUploadReq{ @@ -111,11 +113,10 @@ func TestRefreshUploader(t *testing.T) { // create uploader hk := types.PublicKey{1} - c1 := cs.AddContract(hk) - ul := New(context.Background(), cl, cs, hm, api.HostInfo{}, c1.Metadata(), zap.NewNop().Sugar()) + c1 := cs.AddContract(hk).Metadata() + ul := New(context.Background(), cl, cs, hm, api.HostInfo{}, c1.ID, c1.WindowEnd, zap.NewNop().Sugar()) // renew the first contract - fmt.Println(c1.ID()) c1Renewed, err := cs.RenewContract(hk) if err != nil { t.Fatal(err) diff --git a/internal/upload/uploadmanager.go b/internal/upload/uploadmanager.go new file mode 100644 index 000000000..979bebd7f --- /dev/null +++ b/internal/upload/uploadmanager.go @@ -0,0 +1,959 @@ +package upload + +import ( + "context" + "crypto/md5" + "encoding/hex" + "errors" + "fmt" + "io" + "math" + "sort" + "sync" + "time" + + rhpv2 "go.sia.tech/core/rhp/v2" + + "go.sia.tech/core/types" + "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/host" + "go.sia.tech/renterd/internal/memory" + "go.sia.tech/renterd/internal/upload/uploader" + "go.sia.tech/renterd/internal/utils" + "go.sia.tech/renterd/object" + "go.uber.org/zap" +) + +var ( + ErrContractExpired = errors.New("contract expired") + ErrNoCandidateUploader = errors.New("no candidate uploader found") + ErrShuttingDown = errors.New("upload manager is shutting down") + ErrUploadCancelled = errors.New("upload was cancelled") + ErrUploadNotEnoughHosts = errors.New("not enough hosts to support requested upload redundancy") +) + +type ( + ContractLocker interface { + AcquireContract(ctx context.Context, fcid types.FileContractID, priority int, d time.Duration) (lockID uint64, err error) + KeepaliveContract(ctx context.Context, fcid types.FileContractID, lockID uint64, d time.Duration) (err error) + ReleaseContract(ctx context.Context, fcid types.FileContractID, lockID uint64) (err error) + } + + ObjectStore interface { + AddMultipartPart(ctx context.Context, bucket, key, ETag, uploadID string, partNumber int, slices []object.SlabSlice) (err error) + AddObject(ctx context.Context, bucket, key string, o object.Object, opts api.AddObjectOptions) error + AddPartialSlab(ctx context.Context, data []byte, minShards, totalShards uint8) (slabs []object.SlabSlice, slabBufferMaxSizeSoftReached bool, err error) + AddUploadingSectors(ctx context.Context, uID api.UploadID, root []types.Hash256) error + FinishUpload(ctx context.Context, uID api.UploadID) error + MarkPackedSlabsUploaded(ctx context.Context, slabs []api.UploadedPackedSlab) error + Objects(ctx context.Context, prefix string, opts api.ListObjectOptions) (resp api.ObjectsResponse, err error) + TrackUpload(ctx context.Context, uID api.UploadID) error + UpdateSlab(ctx context.Context, key object.EncryptionKey, sectors []api.UploadedSector) error + } +) + +type ( + HostInfo struct { + api.HostInfo + + // contract info + ContractEndHeight uint64 + ContractID types.FileContractID + ContractRenewedFrom types.FileContractID + } + + Manager struct { + hm host.HostManager + mm memory.MemoryManager + os ObjectStore + cl ContractLocker + cs uploader.ContractStore + uploadKey *utils.UploadKey + logger *zap.SugaredLogger + + maxOverdrive uint64 + overdriveTimeout time.Duration + + statsOverdrivePct *utils.DataPoints + statsSlabUploadSpeedBytesPerMS *utils.DataPoints + + shutdownCtx context.Context + + mu sync.Mutex + uploaders []*uploader.Uploader + } + + Stats struct { + AvgSlabUploadSpeedMBPS float64 + AvgOverdrivePct float64 + HealthyUploaders uint64 + NumUploaders uint64 + UploadSpeedsMBPS map[types.PublicKey]float64 + } +) + +type ( + upload struct { + id api.UploadID + allowed map[types.PublicKey]struct{} + os ObjectStore + shutdownCtx context.Context + } + + uploadedSector struct { + hk types.PublicKey + fcid types.FileContractID + root types.Hash256 + } + + slabUpload struct { + uploadID api.UploadID + + maxOverdrive uint64 + lastOverdrive time.Time + + sectors []*sectorUpload + candidates []*candidate // sorted by upload estimate + + numLaunched uint64 + numInflight uint64 + numOverdriving uint64 + numUploaded uint64 + numSectors uint64 + + mem memory.Memory + + errs utils.HostErrorSet + } + + candidate struct { + uploader *uploader.Uploader + req *uploader.SectorUploadReq + } + + slabUploadResponse struct { + slab object.SlabSlice + index int + err error + } + + sectorUpload struct { + index int + root types.Hash256 + + ctx context.Context + cancel context.CancelCauseFunc + + mu sync.Mutex + uploaded uploadedSector + data *[rhpv2.SectorSize]byte + } +) + +func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, os ObjectStore, cl ContractLocker, cs uploader.ContractStore, maxMemory, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { + logger = logger.Named("uploadmanager") + return &Manager{ + hm: hm, + mm: memory.NewManager(maxMemory, logger), + os: os, + cl: cl, + cs: cs, + uploadKey: uploadKey, + logger: logger.Sugar(), + + maxOverdrive: maxOverdrive, + overdriveTimeout: overdriveTimeout, + + statsOverdrivePct: utils.NewDataPoints(0), + statsSlabUploadSpeedBytesPerMS: utils.NewDataPoints(0), + + shutdownCtx: ctx, + + uploaders: make([]*uploader.Uploader, 0), + } +} + +func (mgr *Manager) AcquireMemory(ctx context.Context, amt uint64) memory.Memory { + return mgr.mm.AcquireMemory(ctx, amt) +} + +func (mgr *Manager) MemoryStatus() memory.Status { + return mgr.mm.Status() +} + +func (mgr *Manager) Stats() Stats { + mgr.mu.Lock() + defer mgr.mu.Unlock() + + var numHealthy uint64 + speeds := make(map[types.PublicKey]float64) + for _, u := range mgr.uploaders { + speeds[u.PublicKey()] = u.AvgUploadSpeedBytesPerMS() * 0.008 + if u.Healthy() { + numHealthy++ + } + } + + // prepare stats + return Stats{ + AvgSlabUploadSpeedMBPS: mgr.statsSlabUploadSpeedBytesPerMS.Average() * 0.008, // convert bytes per ms to mbps, + AvgOverdrivePct: mgr.statsOverdrivePct.Average(), + HealthyUploaders: numHealthy, + NumUploaders: uint64(len(speeds)), + UploadSpeedsMBPS: speeds, + } +} + +func (mgr *Manager) Stop() { + mgr.mu.Lock() + defer mgr.mu.Unlock() + for _, u := range mgr.uploaders { + u.Stop(ErrShuttingDown) + } +} + +func (mgr *Manager) Upload(ctx context.Context, r io.Reader, hosts []HostInfo, up Parameters) (bufferSizeLimitReached bool, eTag string, err error) { + // cancel all in-flight requests when the upload is done + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // create the object + o := object.NewObject(up.EC) + + // create the md5 hasher for the etag + // NOTE: we use md5 since it's s3 compatible and clients expect it to be md5 + hasher := md5.New() + r = io.TeeReader(r, hasher) + + // create the cipher reader + cr, err := o.Encrypt(r, object.EncryptionOptions{ + Offset: up.EncryptionOffset, + Key: mgr.uploadKey, + }) + if err != nil { + return false, "", err + } + + // create the upload + upload, err := mgr.newUpload(up.RS.TotalShards, hosts, up.BH) + if err != nil { + return false, "", err + } + + // track the upload in the bus + if err := mgr.os.TrackUpload(ctx, upload.id); err != nil { + return false, "", fmt.Errorf("failed to track upload '%v', err: %w", upload.id, err) + } + + // defer a function that finishes the upload + defer func() { + ctx, cancel := context.WithTimeout(mgr.shutdownCtx, time.Minute) + if err := mgr.os.FinishUpload(ctx, upload.id); err != nil && !errors.Is(err, context.Canceled) { + mgr.logger.Errorf("failed to mark upload %v as finished: %v", upload.id, err) + } + cancel() + }() + + // create the response channel + respChan := make(chan slabUploadResponse) + + // channel to notify main thread of the number of slabs to wait for + numSlabsChan := make(chan int, 1) + + // prepare slab sizes + slabSizeNoRedundancy := up.RS.SlabSizeNoRedundancy() + slabSize := up.RS.SlabSize() + var partialSlab []byte + + // launch uploads in a separate goroutine + go func() { + var slabIndex int + for { + select { + case <-mgr.shutdownCtx.Done(): + return // interrupted + case <-ctx.Done(): + return // interrupted + default: + } + // acquire memory + mem := mgr.mm.AcquireMemory(ctx, slabSize) + if mem == nil { + return // interrupted + } + + // read next slab's data + data := make([]byte, slabSizeNoRedundancy) + length, err := io.ReadFull(io.LimitReader(cr, int64(slabSizeNoRedundancy)), data) + if err == io.EOF { + mem.Release() + + // no more data to upload, notify main thread of the number of + // slabs to wait for + numSlabs := slabIndex + if partialSlab != nil && slabIndex > 0 { + numSlabs-- // don't wait on partial slab + } + numSlabsChan <- numSlabs + return + } else if err != nil && err != io.ErrUnexpectedEOF { + mem.Release() + + // unexpected error, notify main thread + select { + case respChan <- slabUploadResponse{err: err}: + case <-ctx.Done(): + } + return + } else if up.Packing && errors.Is(err, io.ErrUnexpectedEOF) { + mem.Release() + + // uploadPacking is true, we return the partial slab without + // uploading. + partialSlab = data[:length] + } else { + // regular upload + go func(rs api.RedundancySettings, data []byte, length, slabIndex int) { + uploadSpeed, overdrivePct := upload.uploadSlab(ctx, rs, data, length, slabIndex, respChan, mgr.candidates(upload.allowed), mem, mgr.maxOverdrive, mgr.overdriveTimeout) + + // track stats + mgr.statsSlabUploadSpeedBytesPerMS.Track(float64(uploadSpeed)) + mgr.statsOverdrivePct.Track(overdrivePct) + + // release memory + mem.Release() + }(up.RS, data, length, slabIndex) + } + + slabIndex++ + } + }() + + // collect responses + var responses []slabUploadResponse + numSlabs := math.MaxInt32 + for len(responses) < numSlabs { + select { + case <-mgr.shutdownCtx.Done(): + return false, "", ErrShuttingDown + case <-ctx.Done(): + return false, "", ErrUploadCancelled + case numSlabs = <-numSlabsChan: + case res := <-respChan: + if res.err != nil { + return false, "", res.err + } + responses = append(responses, res) + } + } + + // sort the slabs by index + sort.Slice(responses, func(i, j int) bool { + return responses[i].index < responses[j].index + }) + + // decorate the object with the slabs + for _, resp := range responses { + o.Slabs = append(o.Slabs, resp.slab) + } + + // compute etag + eTag = hex.EncodeToString(hasher.Sum(nil)) + + // add partial slabs + if len(partialSlab) > 0 { + var pss []object.SlabSlice + pss, bufferSizeLimitReached, err = mgr.os.AddPartialSlab(ctx, partialSlab, uint8(up.RS.MinShards), uint8(up.RS.TotalShards)) + if err != nil { + return false, "", err + } + o.Slabs = append(o.Slabs, pss...) + } + + if up.Multipart { + // persist the part + err = mgr.os.AddMultipartPart(ctx, up.Bucket, up.Key, eTag, up.UploadID, up.PartNumber, o.Slabs) + if err != nil { + return bufferSizeLimitReached, "", fmt.Errorf("couldn't add multi part: %w", err) + } + } else { + // persist the object + err = mgr.os.AddObject(ctx, up.Bucket, up.Key, o, api.AddObjectOptions{MimeType: up.MimeType, ETag: eTag, Metadata: up.Metadata}) + if err != nil { + return bufferSizeLimitReached, "", fmt.Errorf("couldn't add object: %w", err) + } + } + + return +} + +func (mgr *Manager) UploadPackedSlab(ctx context.Context, rs api.RedundancySettings, ps api.PackedSlab, mem memory.Memory, hosts []HostInfo, bh uint64) (err error) { + // cancel all in-flight requests when the upload is done + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // build the shards + shards := encryptPartialSlab(ps.Data, ps.EncryptionKey, uint8(rs.MinShards), uint8(rs.TotalShards)) + + // create the upload + upload, err := mgr.newUpload(len(shards), hosts, bh) + if err != nil { + return err + } + + // track the upload in the bus + if err := mgr.os.TrackUpload(ctx, upload.id); err != nil { + return fmt.Errorf("failed to track upload '%v', err: %w", upload.id, err) + } + + // defer a function that finishes the upload + defer func() { + ctx, cancel := context.WithTimeout(mgr.shutdownCtx, time.Minute) + if err := mgr.os.FinishUpload(ctx, upload.id); err != nil { + mgr.logger.Errorf("failed to mark upload %v as finished: %v", upload.id, err) + } + cancel() + }() + + // upload the shards + uploaded, uploadSpeed, overdrivePct, err := upload.uploadShards(ctx, shards, mgr.candidates(upload.allowed), mem, mgr.maxOverdrive, mgr.overdriveTimeout) + if err != nil { + return err + } + + // build sectors + var sectors []api.UploadedSector + for _, sector := range uploaded { + sectors = append(sectors, api.UploadedSector{ + ContractID: sector.fcid, + Root: sector.root, + }) + } + + // track stats + mgr.statsSlabUploadSpeedBytesPerMS.Track(float64(uploadSpeed)) + mgr.statsOverdrivePct.Track(overdrivePct) + + // mark packed slab as uploaded + slab := api.UploadedPackedSlab{BufferID: ps.BufferID, Shards: sectors} + err = mgr.os.MarkPackedSlabsUploaded(ctx, []api.UploadedPackedSlab{slab}) + if err != nil { + return fmt.Errorf("couldn't mark packed slabs uploaded, err: %v", err) + } + + return nil +} + +func (mgr *Manager) UploadShards(ctx context.Context, s object.Slab, shardIndices []int, shards [][]byte, hosts []HostInfo, bh uint64, mem memory.Memory) (err error) { + // cancel all in-flight requests when the upload is done + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // create the upload + upload, err := mgr.newUpload(len(shards), hosts, bh) + if err != nil { + return err + } + + // track the upload in the bus + if err := mgr.os.TrackUpload(ctx, upload.id); err != nil { + return fmt.Errorf("failed to track upload '%v', err: %w", upload.id, err) + } + + // defer a function that finishes the upload + defer func() { + ctx, cancel := context.WithTimeout(mgr.shutdownCtx, time.Minute) + if err := mgr.os.FinishUpload(ctx, upload.id); err != nil { + mgr.logger.Errorf("failed to mark upload %v as finished: %v", upload.id, err) + } + cancel() + }() + + // upload the shards + uploaded, uploadSpeed, overdrivePct, err := upload.uploadShards(ctx, shards, mgr.candidates(upload.allowed), mem, mgr.maxOverdrive, mgr.overdriveTimeout) + if err != nil { + return err + } + + // build sectors + var sectors []api.UploadedSector + for _, sector := range uploaded { + sectors = append(sectors, api.UploadedSector{ + ContractID: sector.fcid, + Root: sector.root, + }) + } + + // track stats + mgr.statsOverdrivePct.Track(overdrivePct) + mgr.statsSlabUploadSpeedBytesPerMS.Track(float64(uploadSpeed)) + + // update the slab + return mgr.os.UpdateSlab(ctx, s.EncryptionKey, sectors) +} + +func (mgr *Manager) candidates(allowed map[types.PublicKey]struct{}) (candidates []*uploader.Uploader) { + mgr.mu.Lock() + defer mgr.mu.Unlock() + + for _, u := range mgr.uploaders { + if _, allowed := allowed[u.PublicKey()]; allowed { + candidates = append(candidates, u) + } + } + + // sort candidates by upload estimate + sort.Slice(candidates, func(i, j int) bool { + return candidates[i].Estimate() < candidates[j].Estimate() + }) + return +} + +func (mgr *Manager) newUpload(totalShards int, hosts []HostInfo, bh uint64) (*upload, error) { + mgr.mu.Lock() + defer mgr.mu.Unlock() + + // refresh the uploaders + mgr.refreshUploaders(hosts, bh) + + // check if we have enough contracts + if len(hosts) < totalShards { + return nil, fmt.Errorf("%v < %v: %w", len(hosts), totalShards, ErrUploadNotEnoughHosts) + } + + // create allowed map + allowed := make(map[types.PublicKey]struct{}) + for _, h := range hosts { + allowed[h.PublicKey] = struct{}{} + } + + // create upload + return &upload{ + id: api.NewUploadID(), + allowed: allowed, + os: mgr.os, + shutdownCtx: mgr.shutdownCtx, + }, nil +} + +func (mgr *Manager) refreshUploaders(hosts []HostInfo, bh uint64) { + // build table to lookup lookup + lookup := make(map[types.FileContractID]HostInfo) + for _, h := range hosts { + if h.ContractRenewedFrom != (types.FileContractID{}) { + lookup[h.ContractRenewedFrom] = h + } + } + + // refresh uploaders + var refreshed []*uploader.Uploader + existing := make(map[types.FileContractID]struct{}) + for _, uploader := range mgr.uploaders { + // refresh uploaders that got renewed + if renewal, renewed := lookup[uploader.ContractID()]; renewed { + uploader.Refresh(&renewal.HostInfo, renewal.ContractID, renewal.ContractEndHeight) + } + + // stop uploaders that expired + if uploader.Expired(bh) { + uploader.Stop(ErrContractExpired) + continue + } + + // recompute the stats + uploader.TryRecomputeStats() + + // add to the list + refreshed = append(refreshed, uploader) + existing[uploader.ContractID()] = struct{}{} + } + + // add missing uploaders + for _, h := range hosts { + if _, exists := existing[h.ContractID]; !exists && bh < h.ContractEndHeight { + uploader := uploader.New(mgr.shutdownCtx, mgr.cl, mgr.cs, mgr.hm, h.HostInfo, h.ContractID, h.ContractEndHeight, mgr.logger) + refreshed = append(refreshed, uploader) + go uploader.Start() + } + } + + mgr.uploaders = refreshed + return +} + +func (u *upload) newSlabUpload(ctx context.Context, shards [][]byte, uploaders []*uploader.Uploader, mem memory.Memory, maxOverdrive uint64) (*slabUpload, chan uploader.SectorUploadResp) { + // prepare response channel + responseChan := make(chan uploader.SectorUploadResp) + + // prepare sectors + var wg sync.WaitGroup + sectors := make([]*sectorUpload, len(shards)) + for sI := range shards { + wg.Add(1) + go func(idx int) { + // create the ctx + sCtx, sCancel := context.WithCancelCause(ctx) + + // create the sector + // NOTE: we are computing the sector root here and pass it all the + // way down to the RPC to avoid having to recompute it for the proof + // verification. This is necessary because we need it ahead of time + // for the call to AddUploadingSector in upload.go + // Once we upload to temp storage we don't need AddUploadingSector + // anymore and can move it back to the RPC. + sectors[idx] = §orUpload{ + data: (*[rhpv2.SectorSize]byte)(shards[idx]), + index: idx, + root: rhpv2.SectorRoot((*[rhpv2.SectorSize]byte)(shards[idx])), + ctx: sCtx, + cancel: sCancel, + } + wg.Done() + }(sI) + } + wg.Wait() + + // prepare candidates + candidates := make([]*candidate, len(uploaders)) + for i, uploader := range uploaders { + candidates[i] = &candidate{uploader: uploader} + } + + // create slab upload + return &slabUpload{ + uploadID: u.id, + + maxOverdrive: maxOverdrive, + mem: mem, + + sectors: sectors, + candidates: candidates, + numSectors: uint64(len(shards)), + + errs: make(utils.HostErrorSet), + }, responseChan +} + +func (u *upload) uploadSlab(ctx context.Context, rs api.RedundancySettings, data []byte, length, index int, respChan chan slabUploadResponse, candidates []*uploader.Uploader, mem memory.Memory, maxOverdrive uint64, overdriveTimeout time.Duration) (int64, float64) { + // create the response + resp := slabUploadResponse{ + slab: object.SlabSlice{ + Slab: object.NewSlab(uint8(rs.MinShards)), + Offset: 0, + Length: uint32(length), + }, + index: index, + } + + // create the shards + shards := make([][]byte, rs.TotalShards) + resp.slab.Slab.Encode(data, shards) + resp.slab.Slab.Encrypt(shards) + + // upload the shards + uploaded, uploadSpeed, overdrivePct, err := u.uploadShards(ctx, shards, candidates, mem, maxOverdrive, overdriveTimeout) + + // build the sectors + var sectors []object.Sector + for _, sector := range uploaded { + sectors = append(sectors, sector.toObjectSector()) + } + + // decorate the response + resp.err = err + resp.slab.Shards = sectors + + // send the response + select { + case <-ctx.Done(): + case respChan <- resp: + } + + return uploadSpeed, overdrivePct +} + +func (u *upload) uploadShards(ctx context.Context, shards [][]byte, candidates []*uploader.Uploader, mem memory.Memory, maxOverdrive uint64, overdriveTimeout time.Duration) (sectors []uploadedSector, uploadSpeed int64, overdrivePct float64, err error) { + // ensure inflight uploads get cancelled + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + // prepare the upload + slab, respChan := u.newSlabUpload(ctx, shards, candidates, mem, maxOverdrive) + + // prepare requests + requests := make([]*uploader.SectorUploadReq, len(shards)) + roots := make([]types.Hash256, len(shards)) + for sI := range shards { + s := slab.sectors[sI] + requests[sI] = uploader.NewUploadRequest(s.ctx, s.data, sI, respChan, s.root, false) + roots[sI] = slab.sectors[sI].root + } + + // notify bus about roots + if err := u.os.AddUploadingSectors(ctx, u.id, roots); err != nil { + return nil, 0, 0, fmt.Errorf("failed to add sector to uploading sectors: %w", err) + } + + // launch all requests + for _, upload := range requests { + if err := slab.launch(upload); err != nil { + return nil, 0, 0, err + } + } + + // create an overdrive timer + if overdriveTimeout == 0 { + overdriveTimeout = time.Duration(math.MaxInt64) + } + timer := time.NewTimer(overdriveTimeout) + + // create a request buffer + var buffer []*uploader.SectorUploadReq + + // start the timer after the upload has started + // newSlabUpload is quite slow due to computing the sector roots + start := time.Now() + + // collect responses + var used bool + var done bool +loop: + for slab.numInflight > 0 && !done { + select { + case <-u.shutdownCtx.Done(): + return nil, 0, 0, ErrShuttingDown + case <-ctx.Done(): + return nil, 0, 0, context.Cause(ctx) + case resp := <-respChan: + // receive the response + used, done = slab.receive(resp) + if done { + break loop + } + + // relaunch non-overdrive uploads + if resp.Err != nil && !resp.Req.Overdrive { + if err := slab.launch(resp.Req); err != nil { + // a failure to relaunch non-overdrive uploads is bad, but + // we need to keep them around because an overdrive upload + // might've been redundant, in which case we can re-use the + // host to launch this request + buffer = append(buffer, resp.Req) + } + } else if resp.Err == nil && !used { + if len(buffer) > 0 { + // relaunch buffered upload request + if err := slab.launch(buffer[0]); err == nil { + buffer = buffer[1:] + } + } else if slab.canOverdrive(overdriveTimeout) { + // or try overdriving a sector + _ = slab.launch(slab.nextRequest(respChan)) + } + } + case <-timer.C: + // try overdriving a sector + if slab.canOverdrive(overdriveTimeout) { + _ = slab.launch(slab.nextRequest(respChan)) // ignore result + } + } + + // reset the overdrive timer + if overdriveTimeout != math.MaxInt64 { + if !timer.Stop() { + select { + case <-timer.C: + default: + } + } + timer.Reset(overdriveTimeout) + } + } + + // calculate the upload speed + bytes := slab.numUploaded * rhpv2.SectorSize + ms := time.Since(start).Milliseconds() + if ms == 0 { + ms = 1 + } + uploadSpeed = int64(bytes) / ms + + // calculate overdrive pct + var numOverdrive uint64 + if slab.numLaunched > slab.numSectors { + numOverdrive = slab.numLaunched - slab.numSectors + } + overdrivePct = float64(numOverdrive) / float64(slab.numSectors) + + if slab.numUploaded < slab.numSectors { + remaining := slab.numSectors - slab.numUploaded + err = fmt.Errorf("failed to upload slab: launched=%d uploaded=%d remaining=%d inflight=%d pending=%d uploaders=%d errors=%d %w", slab.numLaunched, slab.numUploaded, remaining, slab.numInflight, len(buffer), len(slab.candidates), len(slab.errs), slab.errs) + return + } + + // collect the sectors + for _, sector := range slab.sectors { + sectors = append(sectors, sector.uploaded) + } + return +} + +func (s *slabUpload) canOverdrive(overdriveTimeout time.Duration) bool { + // overdrive is not kicking in yet + remaining := s.numSectors - s.numUploaded + if remaining > s.maxOverdrive { + return false + } + + // overdrive is not due yet + if time.Since(s.lastOverdrive) < overdriveTimeout { + return false + } + + // overdrive is maxed out + if s.numInflight-remaining >= s.maxOverdrive { + return false + } + + return true +} + +func (s *slabUpload) launch(req *uploader.SectorUploadReq) error { + // nothing to do + if req == nil { + return nil + } + + // find candidate + var candidate *candidate + for _, c := range s.candidates { + if c.req != nil { + continue + } + candidate = c + break + } + + // no candidate found + if candidate == nil { + return ErrNoCandidateUploader + } + + // update the candidate + candidate.req = req + if req.Overdrive { + s.lastOverdrive = time.Now() + s.numOverdriving++ + } + // update the state + s.numInflight++ + s.numLaunched++ + + // enqueue the req + candidate.uploader.Enqueue(req) + return nil +} + +func (s *slabUpload) nextRequest(responseChan chan uploader.SectorUploadResp) *uploader.SectorUploadReq { + // count overdrives + overdriveCnts := make(map[int]int) + for _, c := range s.candidates { + if c.req != nil && c.req.Overdrive { + overdriveCnts[c.req.Idx]++ + } + } + + // overdrive the sector with the least amount of overdrives + lowestNumOverdrives := math.MaxInt + var nextSector *sectorUpload + for sI, sector := range s.sectors { + if !sector.isUploaded() && overdriveCnts[sI] < lowestNumOverdrives { + lowestNumOverdrives = overdriveCnts[sI] + nextSector = sector + } + } + if nextSector == nil { + return nil + } + + return uploader.NewUploadRequest(nextSector.ctx, nextSector.data, nextSector.index, responseChan, nextSector.root, true) +} + +func (s *slabUpload) receive(resp uploader.SectorUploadResp) (bool, bool) { + // convenience variable + req := resp.Req + sector := s.sectors[req.Idx] + + // update the state + if req.Overdrive { + s.numOverdriving-- + } + s.numInflight-- + + // redundant sectors can't complete the upload + if sector.isUploaded() { + // release the candidate + for _, candidate := range s.candidates { + if candidate.req == req { + candidate.req = nil + break + } + } + return false, false + } + + // failed reqs can't complete the upload, we do this after the isUploaded + // check since any error returned for a redundant sector is probably a + // result of the sector ctx being closed + if resp.Err != nil { + s.errs[resp.HK] = resp.Err + return false, false + } + + // store the sector + sector.finish(resp) + + // update uploaded sectors + s.numUploaded++ + + // release memory + s.mem.ReleaseSome(rhpv2.SectorSize) + + return true, s.numUploaded == s.numSectors +} + +func (s *sectorUpload) finish(resp uploader.SectorUploadResp) { + s.mu.Lock() + defer s.mu.Unlock() + + s.cancel(uploader.ErrSectorUploadFinished) + s.uploaded = uploadedSector{ + hk: resp.HK, + fcid: resp.FCID, + root: resp.Req.Root, + } + s.data = nil +} + +func (s *sectorUpload) isUploaded() bool { + return s.uploaded.root != (types.Hash256{}) +} + +func (us uploadedSector) toObjectSector() object.Sector { + return object.Sector{ + Contracts: map[types.PublicKey][]types.FileContractID{us.hk: {us.fcid}}, + Root: us.root, + } +} + +func encryptPartialSlab(data []byte, key object.EncryptionKey, minShards, totalShards uint8) [][]byte { + slab := object.Slab{ + EncryptionKey: key, + MinShards: minShards, + Shards: make([]object.Sector, totalShards), + } + encodedShards := make([][]byte, totalShards) + slab.Encode(data, encodedShards) + slab.Encrypt(encodedShards) + return encodedShards +} diff --git a/internal/upload/uploadmanager_test.go b/internal/upload/uploadmanager_test.go new file mode 100644 index 000000000..cf6932df2 --- /dev/null +++ b/internal/upload/uploadmanager_test.go @@ -0,0 +1,65 @@ +package upload + +import ( + "context" + "testing" + + "go.sia.tech/core/types" + "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/host" + "go.uber.org/zap" +) + +type hostManager struct{} + +func (hm *hostManager) Downloader(hi api.HostInfo) host.Downloader { return nil } +func (hm *hostManager) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Uploader { return nil } +func (hm *hostManager) Host(hk types.PublicKey, fcid types.FileContractID, siamuxAddr string) host.Host { + return nil +} + +func TestRefreshUploaders(t *testing.T) { + hm := &hostManager{} + ul := NewManager(context.Background(), nil, hm, nil, nil, nil, 0, 0, 0, zap.NewNop()) + + // prepare host info + hi := HostInfo{ + HostInfo: api.HostInfo{PublicKey: types.PublicKey{1}}, + ContractEndHeight: 1, + ContractID: types.FileContractID{1}, + ContractRenewedFrom: types.FileContractID{}, + } + + // refresh uploaders & assert it got added + ul.refreshUploaders([]HostInfo{hi}, 0) + if len(ul.uploaders) != 1 { + t.Fatalf("unexpected number of uploaders, %v != 1", len(ul.uploaders)) + } + + // update contract + hi.ContractRenewedFrom = hi.ContractID + hi.ContractID = types.FileContractID{2} + hi.ContractEndHeight = 10 + + // assert we still have one + ul.refreshUploaders([]HostInfo{hi}, 0) + if len(ul.uploaders) != 1 { + t.Fatalf("unexpected number of uploaders, %v != 1", len(ul.uploaders)) + } + ull := ul.uploaders[0] + if ull.ContractID() != hi.ContractID { + t.Fatalf("unexpected contract id, %v != %v", ull.ContractID(), hi.ContractID) + } + + // refresh right before expiry + ul.refreshUploaders([]HostInfo{hi}, 9) + if len(ul.uploaders) != 1 { + t.Fatalf("unexpected number of uploaders, %v != 1", len(ul.uploaders)) + } + + // refresh at expiry height + ul.refreshUploaders([]HostInfo{hi}, 10) + if len(ul.uploaders) != 0 { + t.Fatalf("unexpected number of uploaders, %v != 0", len(ul.uploaders)) + } +} diff --git a/internal/upload/uploadparams.go b/internal/upload/uploadparams.go new file mode 100644 index 000000000..bf8983c80 --- /dev/null +++ b/internal/upload/uploadparams.go @@ -0,0 +1,88 @@ +package upload + +import ( + "go.sia.tech/renterd/api" + "go.sia.tech/renterd/object" +) + +type Parameters struct { + Bucket string + Key string + + Multipart bool + UploadID string + PartNumber int + + EC object.EncryptionKey + EncryptionOffset uint64 + + RS api.RedundancySettings + BH uint64 + Packing bool + MimeType string + + Metadata api.ObjectUserMetadata +} + +func DefaultParameters(bucket, key string, rs api.RedundancySettings) Parameters { + return Parameters{ + Bucket: bucket, + Key: key, + + EC: object.GenerateEncryptionKey(object.EncryptionKeyTypeSalted), // random key + EncryptionOffset: 0, // from the beginning + + RS: rs, + } +} + +type Option func(*Parameters) + +func WithBlockHeight(bh uint64) Option { + return func(up *Parameters) { + up.BH = bh + } +} + +func WithCustomKey(ec object.EncryptionKey) Option { + return func(up *Parameters) { + up.EC = ec + } +} + +func WithCustomEncryptionOffset(offset uint64) Option { + return func(up *Parameters) { + up.EncryptionOffset = offset + } +} + +func WithMimeType(mimeType string) Option { + return func(up *Parameters) { + up.MimeType = mimeType + } +} + +func WithPacking(packing bool) Option { + return func(up *Parameters) { + up.Packing = packing + } +} + +func WithPartNumber(partNumber int) Option { + return func(up *Parameters) { + up.PartNumber = partNumber + } +} + +func WithUploadID(uploadID string) Option { + return func(up *Parameters) { + up.UploadID = uploadID + up.Multipart = true + } +} + +func WithObjectUserMetadata(metadata api.ObjectUserMetadata) Option { + return func(up *Parameters) { + up.Metadata = metadata + } +} diff --git a/worker/bench_test.go b/worker/bench_test.go index 3adb96b57..652690e51 100644 --- a/worker/bench_test.go +++ b/worker/bench_test.go @@ -24,20 +24,20 @@ func (z *zeroReader) Read(p []byte) (n int, err error) { // object. // 1036.74 MB/s | M2 Pro | c9dc1b6 func BenchmarkDownloaderSingleObject(b *testing.B) { - w := newTestWorker(b) + w := newTestWorker(b, newTestWorkerCfg()) up := testParameters(b.TempDir()) - up.rs.MinShards = 10 - up.rs.TotalShards = 30 - up.packing = false - w.AddHosts(up.rs.TotalShards) + up.RS.MinShards = 10 + up.RS.TotalShards = 30 + up.Packing = false + w.AddHosts(up.RS.TotalShards) - data := bytes.NewReader(frand.Bytes(int(up.rs.SlabSizeNoRedundancy()))) - _, _, err := w.uploadManager.Upload(context.Background(), data, w.Contracts(), up) + data := bytes.NewReader(frand.Bytes(int(up.RS.SlabSizeNoRedundancy()))) + _, _, err := w.uploadManager.Upload(context.Background(), data, w.UploadHosts(), up) if err != nil { b.Fatal(err) } - o, err := w.os.Object(context.Background(), testBucket, up.key, api.GetObjectOptions{}) + o, err := w.os.Object(context.Background(), testBucket, up.Key, api.GetObjectOptions{}) if err != nil { b.Fatal(err) } @@ -57,19 +57,19 @@ func BenchmarkDownloaderSingleObject(b *testing.B) { // Speed | CPU | Commit // 433.86 MB/s | M2 Pro | bae6e77 func BenchmarkUploaderSingleObject(b *testing.B) { - w := newTestWorker(b) + w := newTestWorker(b, newTestWorkerCfg()) up := testParameters(b.TempDir()) - up.rs.MinShards = 10 - up.rs.TotalShards = 30 - up.packing = false - w.AddHosts(up.rs.TotalShards) + up.RS.MinShards = 10 + up.RS.TotalShards = 30 + up.Packing = false + w.AddHosts(up.RS.TotalShards) - data := io.LimitReader(&zeroReader{}, int64(b.N*rhpv2.SectorSize*up.rs.MinShards)) - b.SetBytes(int64(rhpv2.SectorSize * up.rs.MinShards)) + data := io.LimitReader(&zeroReader{}, int64(b.N*rhpv2.SectorSize*up.RS.MinShards)) + b.SetBytes(int64(rhpv2.SectorSize * up.RS.MinShards)) b.ResetTimer() - _, _, err := w.uploadManager.Upload(context.Background(), data, w.Contracts(), up) + _, _, err := w.uploadManager.Upload(context.Background(), data, w.UploadHosts(), up) if err != nil { b.Fatal(err) } @@ -80,20 +80,20 @@ func BenchmarkUploaderSingleObject(b *testing.B) { // Speed | CPU | Commit // 282.47 MB/s | M2 Pro | bae6e77 func BenchmarkUploaderMultiObject(b *testing.B) { - w := newTestWorker(b) + w := newTestWorker(b, newTestWorkerCfg()) up := testParameters(b.TempDir()) - up.rs.MinShards = 10 - up.rs.TotalShards = 30 - up.packing = false - w.AddHosts(up.rs.TotalShards) + up.RS.MinShards = 10 + up.RS.TotalShards = 30 + up.Packing = false + w.AddHosts(up.RS.TotalShards) - b.SetBytes(int64(rhpv2.SectorSize * up.rs.MinShards)) + b.SetBytes(int64(rhpv2.SectorSize * up.RS.MinShards)) b.ResetTimer() for i := 0; i < b.N; i++ { - data := io.LimitReader(&zeroReader{}, int64(rhpv2.SectorSize*up.rs.MinShards)) - _, _, err := w.uploadManager.Upload(context.Background(), data, w.Contracts(), up) + data := io.LimitReader(&zeroReader{}, int64(rhpv2.SectorSize*up.RS.MinShards)) + _, _, err := w.uploadManager.Upload(context.Background(), data, w.UploadHosts(), up) if err != nil { b.Fatal(err) } diff --git a/worker/migrations.go b/worker/migrations.go index 41d3a2704..dc42ab6db 100644 --- a/worker/migrations.go +++ b/worker/migrations.go @@ -7,11 +7,12 @@ import ( rhpv2 "go.sia.tech/core/rhp/v2" "go.sia.tech/core/types" "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/upload" "go.sia.tech/renterd/object" "go.uber.org/zap" ) -func (w *Worker) migrate(ctx context.Context, s object.Slab, dlHosts []api.HostInfo, ulContracts []hostContract, bh uint64) error { +func (w *Worker) migrate(ctx context.Context, s object.Slab, dlHosts []api.HostInfo, ulHosts []upload.HostInfo, bh uint64) error { // map usable hosts usableHosts := make(map[types.PublicKey]struct{}) for _, h := range dlHosts { @@ -20,8 +21,8 @@ func (w *Worker) migrate(ctx context.Context, s object.Slab, dlHosts []api.HostI // map usable contracts usableContracts := make(map[types.FileContractID]struct{}) - for _, c := range ulContracts { - usableContracts[c.ID] = struct{}{} + for _, c := range ulHosts { + usableContracts[c.ContractID] = struct{}{} } // collect indices of shards that need to be migrated @@ -69,15 +70,15 @@ SHARDS: } // perform some sanity checks - if len(ulContracts) < int(s.MinShards) { - return fmt.Errorf("not enough hosts to repair unhealthy shard to minimum redundancy, %d<%d", len(ulContracts), int(s.MinShards)) + if len(ulHosts) < int(s.MinShards) { + return fmt.Errorf("not enough hosts to repair unhealthy shard to minimum redundancy, %d<%d", len(ulHosts), int(s.MinShards)) } if len(s.Shards)-missingShards < int(s.MinShards) { return fmt.Errorf("not enough hosts to download unhealthy shard, %d<%d", len(s.Shards)-missingShards, int(s.MinShards)) } // acquire memory for the migration - mem := w.uploadManager.mm.AcquireMemory(ctx, uint64(len(shardIndices))*rhpv2.SectorSize) + mem := w.uploadManager.AcquireMemory(ctx, uint64(len(shardIndices))*rhpv2.SectorSize) if mem == nil { return fmt.Errorf("failed to acquire memory for migration") } @@ -102,11 +103,11 @@ SHARDS: shards = shards[:len(shardIndices)] // filter upload contracts to the ones we haven't used yet - var allowed []hostContract - for _, c := range ulContracts { - if _, used := seen[c.HostKey]; !used { - allowed = append(allowed, c) - seen[c.HostKey] = struct{}{} + var allowed []upload.HostInfo + for _, h := range ulHosts { + if _, used := seen[h.PublicKey]; !used { + allowed = append(allowed, h) + seen[h.PublicKey] = struct{}{} } } diff --git a/worker/upload.go b/worker/upload.go index bc0891c46..91b5f4d4b 100644 --- a/worker/upload.go +++ b/worker/upload.go @@ -2,26 +2,17 @@ package worker import ( "context" - "crypto/md5" - "encoding/hex" - "errors" "fmt" "io" - "math" "mime" "path/filepath" - "sort" "sync" "time" - rhpv2 "go.sia.tech/core/rhp/v2" "go.sia.tech/core/types" "go.sia.tech/renterd/api" - "go.sia.tech/renterd/internal/host" "go.sia.tech/renterd/internal/memory" - "go.sia.tech/renterd/internal/upload/uploader" - "go.sia.tech/renterd/internal/utils" - "go.sia.tech/renterd/object" + "go.sia.tech/renterd/internal/upload" "go.uber.org/zap" ) @@ -30,137 +21,20 @@ const ( defaultPackedSlabsUploadTimeout = 10 * time.Minute ) -var ( - errContractExpired = errors.New("contract expired") - errNoCandidateUploader = errors.New("no candidate uploader found") - errNotEnoughContracts = errors.New("not enough contracts to support requested redundancy") - errUploadInterrupted = errors.New("upload was interrupted") - errSectorUploadFinished = errors.New("sector upload already finished") -) - -type ( - uploadManager struct { - hm host.HostManager - hs HostStore - mm memory.MemoryManager - os ObjectStore - cl ContractLocker - cs ContractStore - uploadKey *utils.UploadKey - logger *zap.SugaredLogger - - maxOverdrive uint64 - overdriveTimeout time.Duration - - statsOverdrivePct *utils.DataPoints - statsSlabUploadSpeedBytesPerMS *utils.DataPoints - - shutdownCtx context.Context - - mu sync.Mutex - uploaders []*uploader.Uploader - } - - // TODO: should become a metric - uploadManagerStats struct { - avgSlabUploadSpeedMBPS float64 - avgOverdrivePct float64 - healthyUploaders uint64 - numUploaders uint64 - uploadSpeedsMBPS map[types.PublicKey]float64 - } - - upload struct { - id api.UploadID - allowed map[types.PublicKey]struct{} - os ObjectStore - shutdownCtx context.Context - } - - uploadedSector struct { - hk types.PublicKey - fcid types.FileContractID - root types.Hash256 - } - - slabUpload struct { - uploadID api.UploadID - - maxOverdrive uint64 - lastOverdrive time.Time - - sectors []*sectorUpload - candidates []*candidate // sorted by upload estimate - - numLaunched uint64 - numInflight uint64 - numOverdriving uint64 - numUploaded uint64 - numSectors uint64 - - mem memory.Memory - - errs utils.HostErrorSet - } - - candidate struct { - uploader *uploader.Uploader - req *uploader.SectorUploadReq - } - - slabUploadResponse struct { - slab object.SlabSlice - index int - err error - } - - sectorUpload struct { - index int - root types.Hash256 - - ctx context.Context - cancel context.CancelCauseFunc - - mu sync.Mutex - uploaded uploadedSector - data *[rhpv2.SectorSize]byte - } - - hostContract struct { - api.ContractMetadata - api.HostInfo - } -) - -func (us uploadedSector) toObjectSector() object.Sector { - return object.Sector{ - Contracts: map[types.PublicKey][]types.FileContractID{us.hk: {us.fcid}}, - Root: us.root, - } -} - -func (w *Worker) initUploadManager(uploadKey *utils.UploadKey, maxMemory, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) { - if w.uploadManager != nil { - panic("upload manager already initialized") // developer error - } - - w.uploadManager = newUploadManager(w.shutdownCtx, uploadKey, w, w.bus, w.bus, w.bus, w.bus, maxMemory, maxOverdrive, overdriveTimeout, logger) -} - -func (w *Worker) upload(ctx context.Context, bucket, key string, rs api.RedundancySettings, r io.Reader, contracts []hostContract, opts ...UploadOption) (_ string, err error) { +func (w *Worker) upload(ctx context.Context, bucket, key string, rs api.RedundancySettings, r io.Reader, hosts []upload.HostInfo, opts ...upload.Option) (_ string, err error) { // apply the options - up := defaultParameters(bucket, key, rs) + up := upload.DefaultParameters(bucket, key, rs) for _, opt := range opts { opt(&up) } // if not given, try decide on a mime type using the file extension - if !up.multipart && up.mimeType == "" { - up.mimeType = mime.TypeByExtension(filepath.Ext(up.key)) + if !up.Multipart && up.MimeType == "" { + up.MimeType = mime.TypeByExtension(filepath.Ext(up.Key)) // if mime type is still not known, wrap the reader with a mime reader - if up.mimeType == "" { - up.mimeType, r, err = newMimeReader(r) + if up.MimeType == "" { + up.MimeType, r, err = upload.NewMimeReader(r) if err != nil { return } @@ -168,30 +42,30 @@ func (w *Worker) upload(ctx context.Context, bucket, key string, rs api.Redundan } // perform the upload - bufferSizeLimitReached, eTag, err := w.uploadManager.Upload(ctx, r, contracts, up) + bufferSizeLimitReached, eTag, err := w.uploadManager.Upload(ctx, r, hosts, up) if err != nil { return "", err } // return early if worker was shut down or if we don't have to consider // packed uploads - if w.isStopped() || !up.packing { + if w.isStopped() || !up.Packing { return eTag, nil } // try and upload one slab synchronously if bufferSizeLimitReached { - mem := w.uploadManager.mm.AcquireMemory(ctx, up.rs.SlabSize()) + mem := w.uploadManager.AcquireMemory(ctx, up.RS.SlabSize()) if mem != nil { defer mem.Release() // fetch packed slab to upload - packedSlabs, err := w.bus.PackedSlabsForUpload(ctx, defaultPackedSlabsLockDuration, uint8(up.rs.MinShards), uint8(up.rs.TotalShards), 1) + packedSlabs, err := w.bus.PackedSlabsForUpload(ctx, defaultPackedSlabsLockDuration, uint8(up.RS.MinShards), uint8(up.RS.TotalShards), 1) if err != nil { w.logger.With(zap.Error(err)).Error("couldn't fetch packed slabs from bus") } else if len(packedSlabs) > 0 { // upload packed slab - if err := w.uploadPackedSlab(ctx, mem, packedSlabs[0], up.rs); err != nil { + if err := w.uploadPackedSlab(ctx, mem, packedSlabs[0], up.RS); err != nil { w.logger.With(zap.Error(err)).Error("failed to upload packed slab") } } @@ -199,7 +73,7 @@ func (w *Worker) upload(ctx context.Context, bucket, key string, rs api.Redundan } // make sure there's a goroutine uploading any packed slabs - go w.threadedUploadPackedSlabs(up.rs) + go w.threadedUploadPackedSlabs(up.RS) return eTag, nil } @@ -228,7 +102,7 @@ func (w *Worker) threadedUploadPackedSlabs(rs api.RedundancySettings) { var wg sync.WaitGroup for { // block until we have memory - mem := w.uploadManager.mm.AcquireMemory(interruptCtx, rs.SlabSize()) + mem := w.uploadManager.AcquireMemory(interruptCtx, rs.SlabSize()) if mem == nil { break // interrupted } @@ -270,14 +144,14 @@ func (w *Worker) threadedUploadPackedSlabs(rs api.RedundancySettings) { wg.Wait() } -func (w *Worker) hostContracts(ctx context.Context) (hcs []hostContract, _ error) { - hosts, err := w.bus.UsableHosts(ctx) +func (w *Worker) hostContracts(ctx context.Context) (hosts []upload.HostInfo, _ error) { + usableHosts, err := w.bus.UsableHosts(ctx) if err != nil { return nil, fmt.Errorf("couldn't fetch usable hosts from bus: %v", err) } hmap := make(map[types.PublicKey]api.HostInfo) - for _, h := range hosts { + for _, h := range usableHosts { hmap[h.PublicKey] = h } @@ -288,7 +162,12 @@ func (w *Worker) hostContracts(ctx context.Context) (hcs []hostContract, _ error for _, c := range contracts { if h, ok := hmap[c.HostKey]; ok { - hcs = append(hcs, hostContract{c, h}) + hosts = append(hosts, upload.HostInfo{ + HostInfo: h, + ContractEndHeight: c.WindowEnd, + ContractID: c.ID, + ContractRenewedFrom: c.RenewedFrom, + }) } } return @@ -318,785 +197,3 @@ func (w *Worker) uploadPackedSlab(ctx context.Context, mem memory.Memory, ps api return nil } - -func newUploadManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, hs HostStore, os ObjectStore, cl ContractLocker, cs ContractStore, maxMemory, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *uploadManager { - logger = logger.Named("uploadmanager") - return &uploadManager{ - hm: hm, - hs: hs, - mm: memory.NewManager(maxMemory, logger), - os: os, - cl: cl, - cs: cs, - uploadKey: uploadKey, - logger: logger.Sugar(), - - maxOverdrive: maxOverdrive, - overdriveTimeout: overdriveTimeout, - - statsOverdrivePct: utils.NewDataPoints(0), - statsSlabUploadSpeedBytesPerMS: utils.NewDataPoints(0), - - shutdownCtx: ctx, - - uploaders: make([]*uploader.Uploader, 0), - } -} - -func (mgr *uploadManager) Stats() uploadManagerStats { - mgr.mu.Lock() - defer mgr.mu.Unlock() - - var numHealthy uint64 - speeds := make(map[types.PublicKey]float64) - for _, u := range mgr.uploaders { - speeds[u.PublicKey()] = u.AvgUploadSpeedBytesPerMS() * 0.008 - if u.Healthy() { - numHealthy++ - } - } - - // prepare stats - return uploadManagerStats{ - avgSlabUploadSpeedMBPS: mgr.statsSlabUploadSpeedBytesPerMS.Average() * 0.008, // convert bytes per ms to mbps, - avgOverdrivePct: mgr.statsOverdrivePct.Average(), - healthyUploaders: numHealthy, - numUploaders: uint64(len(speeds)), - uploadSpeedsMBPS: speeds, - } -} - -func (mgr *uploadManager) Stop() { - mgr.mu.Lock() - defer mgr.mu.Unlock() - for _, u := range mgr.uploaders { - u.Stop(ErrShuttingDown) - } -} - -func (mgr *uploadManager) Upload(ctx context.Context, r io.Reader, contracts []hostContract, up uploadParameters) (bufferSizeLimitReached bool, eTag string, err error) { - // cancel all in-flight requests when the upload is done - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // create the object - o := object.NewObject(up.ec) - - // create the md5 hasher for the etag - // NOTE: we use md5 since it's s3 compatible and clients expect it to be md5 - hasher := md5.New() - r = io.TeeReader(r, hasher) - - // create the cipher reader - cr, err := o.Encrypt(r, object.EncryptionOptions{ - Offset: up.encryptionOffset, - Key: mgr.uploadKey, - }) - if err != nil { - return false, "", err - } - - // create the upload - upload, err := mgr.newUpload(up.rs.TotalShards, contracts, up.bh) - if err != nil { - return false, "", err - } - - // track the upload in the bus - if err := mgr.os.TrackUpload(ctx, upload.id); err != nil { - return false, "", fmt.Errorf("failed to track upload '%v', err: %w", upload.id, err) - } - - // defer a function that finishes the upload - defer func() { - ctx, cancel := context.WithTimeout(mgr.shutdownCtx, time.Minute) - if err := mgr.os.FinishUpload(ctx, upload.id); err != nil && !errors.Is(err, context.Canceled) { - mgr.logger.Errorf("failed to mark upload %v as finished: %v", upload.id, err) - } - cancel() - }() - - // create the response channel - respChan := make(chan slabUploadResponse) - - // channel to notify main thread of the number of slabs to wait for - numSlabsChan := make(chan int, 1) - - // prepare slab sizes - slabSizeNoRedundancy := up.rs.SlabSizeNoRedundancy() - slabSize := up.rs.SlabSize() - var partialSlab []byte - - // launch uploads in a separate goroutine - go func() { - var slabIndex int - for { - select { - case <-mgr.shutdownCtx.Done(): - return // interrupted - case <-ctx.Done(): - return // interrupted - default: - } - // acquire memory - mem := mgr.mm.AcquireMemory(ctx, slabSize) - if mem == nil { - return // interrupted - } - - // read next slab's data - data := make([]byte, slabSizeNoRedundancy) - length, err := io.ReadFull(io.LimitReader(cr, int64(slabSizeNoRedundancy)), data) - if err == io.EOF { - mem.Release() - - // no more data to upload, notify main thread of the number of - // slabs to wait for - numSlabs := slabIndex - if partialSlab != nil && slabIndex > 0 { - numSlabs-- // don't wait on partial slab - } - numSlabsChan <- numSlabs - return - } else if err != nil && err != io.ErrUnexpectedEOF { - mem.Release() - - // unexpected error, notify main thread - select { - case respChan <- slabUploadResponse{err: err}: - case <-ctx.Done(): - } - return - } else if up.packing && errors.Is(err, io.ErrUnexpectedEOF) { - mem.Release() - - // uploadPacking is true, we return the partial slab without - // uploading. - partialSlab = data[:length] - } else { - // regular upload - go func(rs api.RedundancySettings, data []byte, length, slabIndex int) { - uploadSpeed, overdrivePct := upload.uploadSlab(ctx, rs, data, length, slabIndex, respChan, mgr.candidates(upload.allowed), mem, mgr.maxOverdrive, mgr.overdriveTimeout) - - // track stats - mgr.statsSlabUploadSpeedBytesPerMS.Track(float64(uploadSpeed)) - mgr.statsOverdrivePct.Track(overdrivePct) - - // release memory - mem.Release() - }(up.rs, data, length, slabIndex) - } - - slabIndex++ - } - }() - - // collect responses - var responses []slabUploadResponse - numSlabs := math.MaxInt32 - for len(responses) < numSlabs { - select { - case <-mgr.shutdownCtx.Done(): - return false, "", ErrShuttingDown - case <-ctx.Done(): - return false, "", errUploadInterrupted - case numSlabs = <-numSlabsChan: - case res := <-respChan: - if res.err != nil { - return false, "", res.err - } - responses = append(responses, res) - } - } - - // sort the slabs by index - sort.Slice(responses, func(i, j int) bool { - return responses[i].index < responses[j].index - }) - - // decorate the object with the slabs - for _, resp := range responses { - o.Slabs = append(o.Slabs, resp.slab) - } - - // compute etag - eTag = hex.EncodeToString(hasher.Sum(nil)) - - // add partial slabs - if len(partialSlab) > 0 { - var pss []object.SlabSlice - pss, bufferSizeLimitReached, err = mgr.os.AddPartialSlab(ctx, partialSlab, uint8(up.rs.MinShards), uint8(up.rs.TotalShards)) - if err != nil { - return false, "", err - } - o.Slabs = append(o.Slabs, pss...) - } - - if up.multipart { - // persist the part - err = mgr.os.AddMultipartPart(ctx, up.bucket, up.key, eTag, up.uploadID, up.partNumber, o.Slabs) - if err != nil { - return bufferSizeLimitReached, "", fmt.Errorf("couldn't add multi part: %w", err) - } - } else { - // persist the object - err = mgr.os.AddObject(ctx, up.bucket, up.key, o, api.AddObjectOptions{MimeType: up.mimeType, ETag: eTag, Metadata: up.metadata}) - if err != nil { - return bufferSizeLimitReached, "", fmt.Errorf("couldn't add object: %w", err) - } - } - - return -} - -func (mgr *uploadManager) UploadPackedSlab(ctx context.Context, rs api.RedundancySettings, ps api.PackedSlab, mem memory.Memory, contracts []hostContract, bh uint64) (err error) { - // cancel all in-flight requests when the upload is done - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // build the shards - shards := encryptPartialSlab(ps.Data, ps.EncryptionKey, uint8(rs.MinShards), uint8(rs.TotalShards)) - - // create the upload - upload, err := mgr.newUpload(len(shards), contracts, bh) - if err != nil { - return err - } - - // track the upload in the bus - if err := mgr.os.TrackUpload(ctx, upload.id); err != nil { - return fmt.Errorf("failed to track upload '%v', err: %w", upload.id, err) - } - - // defer a function that finishes the upload - defer func() { - ctx, cancel := context.WithTimeout(mgr.shutdownCtx, time.Minute) - if err := mgr.os.FinishUpload(ctx, upload.id); err != nil { - mgr.logger.Errorf("failed to mark upload %v as finished: %v", upload.id, err) - } - cancel() - }() - - // upload the shards - uploaded, uploadSpeed, overdrivePct, err := upload.uploadShards(ctx, shards, mgr.candidates(upload.allowed), mem, mgr.maxOverdrive, mgr.overdriveTimeout) - if err != nil { - return err - } - - // build sectors - var sectors []api.UploadedSector - for _, sector := range uploaded { - sectors = append(sectors, api.UploadedSector{ - ContractID: sector.fcid, - Root: sector.root, - }) - } - - // track stats - mgr.statsSlabUploadSpeedBytesPerMS.Track(float64(uploadSpeed)) - mgr.statsOverdrivePct.Track(overdrivePct) - - // mark packed slab as uploaded - slab := api.UploadedPackedSlab{BufferID: ps.BufferID, Shards: sectors} - err = mgr.os.MarkPackedSlabsUploaded(ctx, []api.UploadedPackedSlab{slab}) - if err != nil { - return fmt.Errorf("couldn't mark packed slabs uploaded, err: %v", err) - } - - return nil -} - -func (mgr *uploadManager) UploadShards(ctx context.Context, s object.Slab, shardIndices []int, shards [][]byte, contracts []hostContract, bh uint64, mem memory.Memory) (err error) { - // cancel all in-flight requests when the upload is done - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // create the upload - upload, err := mgr.newUpload(len(shards), contracts, bh) - if err != nil { - return err - } - - // track the upload in the bus - if err := mgr.os.TrackUpload(ctx, upload.id); err != nil { - return fmt.Errorf("failed to track upload '%v', err: %w", upload.id, err) - } - - // defer a function that finishes the upload - defer func() { - ctx, cancel := context.WithTimeout(mgr.shutdownCtx, time.Minute) - if err := mgr.os.FinishUpload(ctx, upload.id); err != nil { - mgr.logger.Errorf("failed to mark upload %v as finished: %v", upload.id, err) - } - cancel() - }() - - // upload the shards - uploaded, uploadSpeed, overdrivePct, err := upload.uploadShards(ctx, shards, mgr.candidates(upload.allowed), mem, mgr.maxOverdrive, mgr.overdriveTimeout) - if err != nil { - return err - } - - // build sectors - var sectors []api.UploadedSector - for _, sector := range uploaded { - sectors = append(sectors, api.UploadedSector{ - ContractID: sector.fcid, - Root: sector.root, - }) - } - - // track stats - mgr.statsOverdrivePct.Track(overdrivePct) - mgr.statsSlabUploadSpeedBytesPerMS.Track(float64(uploadSpeed)) - - // update the slab - return mgr.os.UpdateSlab(ctx, s.EncryptionKey, sectors) -} - -func (mgr *uploadManager) candidates(allowed map[types.PublicKey]struct{}) (candidates []*uploader.Uploader) { - mgr.mu.Lock() - defer mgr.mu.Unlock() - - for _, u := range mgr.uploaders { - if _, allowed := allowed[u.PublicKey()]; allowed { - candidates = append(candidates, u) - } - } - - // sort candidates by upload estimate - sort.Slice(candidates, func(i, j int) bool { - return candidates[i].Estimate() < candidates[j].Estimate() - }) - return -} - -func (mgr *uploadManager) newUpload(totalShards int, contracts []hostContract, bh uint64) (*upload, error) { - mgr.mu.Lock() - defer mgr.mu.Unlock() - - // refresh the uploaders - mgr.refreshUploaders(contracts, bh) - - // check if we have enough contracts - if len(contracts) < totalShards { - return nil, fmt.Errorf("%v < %v: %w", len(contracts), totalShards, errNotEnoughContracts) - } - - // create allowed map - allowed := make(map[types.PublicKey]struct{}) - for _, c := range contracts { - allowed[c.HostKey] = struct{}{} - } - - // create upload - return &upload{ - id: api.NewUploadID(), - allowed: allowed, - os: mgr.os, - shutdownCtx: mgr.shutdownCtx, - }, nil -} - -func (mgr *uploadManager) refreshUploaders(contracts []hostContract, bh uint64) { - // build map of renewals - renewals := make(map[types.FileContractID]hostContract) - for _, c := range contracts { - if c.RenewedFrom != (types.FileContractID{}) { - renewals[c.RenewedFrom] = c - } - } - - // refresh uploaders - var refreshed []*uploader.Uploader - existing := make(map[types.FileContractID]struct{}) - for _, uploader := range mgr.uploaders { - // refresh uploaders that got renewed - if renewal, renewed := renewals[uploader.ContractID()]; renewed { - uploader.Refresh(&renewal.HostInfo, renewal.ContractMetadata) - } - - // stop uploaders that expired - if uploader.Expired(bh) { - uploader.Stop(errContractExpired) - continue - } - - // recompute the stats - uploader.TryRecomputeStats() - - // add to the list - refreshed = append(refreshed, uploader) - existing[uploader.ContractID()] = struct{}{} - } - - // add missing uploaders - for _, c := range contracts { - if _, exists := existing[c.ID]; !exists && bh < c.WindowEnd { - uploader := uploader.New(mgr.shutdownCtx, mgr.cl, mgr.cs, mgr.hm, c.HostInfo, c.ContractMetadata, mgr.logger) - refreshed = append(refreshed, uploader) - go uploader.Start() - } - } - - mgr.uploaders = refreshed - return -} - -func (u *upload) newSlabUpload(ctx context.Context, shards [][]byte, uploaders []*uploader.Uploader, mem memory.Memory, maxOverdrive uint64) (*slabUpload, chan uploader.SectorUploadResp) { - // prepare response channel - responseChan := make(chan uploader.SectorUploadResp) - - // prepare sectors - var wg sync.WaitGroup - sectors := make([]*sectorUpload, len(shards)) - for sI := range shards { - wg.Add(1) - go func(idx int) { - // create the ctx - sCtx, sCancel := context.WithCancelCause(ctx) - - // create the sector - // NOTE: we are computing the sector root here and pass it all the - // way down to the RPC to avoid having to recompute it for the proof - // verification. This is necessary because we need it ahead of time - // for the call to AddUploadingSector in uploader.go - // Once we upload to temp storage we don't need AddUploadingSector - // anymore and can move it back to the RPC. - sectors[idx] = §orUpload{ - data: (*[rhpv2.SectorSize]byte)(shards[idx]), - index: idx, - root: rhpv2.SectorRoot((*[rhpv2.SectorSize]byte)(shards[idx])), - ctx: sCtx, - cancel: sCancel, - } - wg.Done() - }(sI) - } - wg.Wait() - - // prepare candidates - candidates := make([]*candidate, len(uploaders)) - for i, uploader := range uploaders { - candidates[i] = &candidate{uploader: uploader} - } - - // create slab upload - return &slabUpload{ - uploadID: u.id, - - maxOverdrive: maxOverdrive, - mem: mem, - - sectors: sectors, - candidates: candidates, - numSectors: uint64(len(shards)), - - errs: make(utils.HostErrorSet), - }, responseChan -} - -func (u *upload) uploadSlab(ctx context.Context, rs api.RedundancySettings, data []byte, length, index int, respChan chan slabUploadResponse, candidates []*uploader.Uploader, mem memory.Memory, maxOverdrive uint64, overdriveTimeout time.Duration) (int64, float64) { - // create the response - resp := slabUploadResponse{ - slab: object.SlabSlice{ - Slab: object.NewSlab(uint8(rs.MinShards)), - Offset: 0, - Length: uint32(length), - }, - index: index, - } - - // create the shards - shards := make([][]byte, rs.TotalShards) - resp.slab.Slab.Encode(data, shards) - resp.slab.Slab.Encrypt(shards) - - // upload the shards - uploaded, uploadSpeed, overdrivePct, err := u.uploadShards(ctx, shards, candidates, mem, maxOverdrive, overdriveTimeout) - - // build the sectors - var sectors []object.Sector - for _, sector := range uploaded { - sectors = append(sectors, sector.toObjectSector()) - } - - // decorate the response - resp.err = err - resp.slab.Shards = sectors - - // send the response - select { - case <-ctx.Done(): - case respChan <- resp: - } - - return uploadSpeed, overdrivePct -} - -func (u *upload) uploadShards(ctx context.Context, shards [][]byte, candidates []*uploader.Uploader, mem memory.Memory, maxOverdrive uint64, overdriveTimeout time.Duration) (sectors []uploadedSector, uploadSpeed int64, overdrivePct float64, err error) { - // ensure inflight uploads get cancelled - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // prepare the upload - slab, respChan := u.newSlabUpload(ctx, shards, candidates, mem, maxOverdrive) - - // prepare requests - requests := make([]*uploader.SectorUploadReq, len(shards)) - roots := make([]types.Hash256, len(shards)) - for sI := range shards { - s := slab.sectors[sI] - requests[sI] = uploader.NewUploadRequest(s.ctx, s.data, sI, respChan, s.root, false) - roots[sI] = slab.sectors[sI].root - } - - // notify bus about roots - if err := u.os.AddUploadingSectors(ctx, u.id, roots); err != nil { - return nil, 0, 0, fmt.Errorf("failed to add sector to uploading sectors: %w", err) - } - - // launch all requests - for _, upload := range requests { - if err := slab.launch(upload); err != nil { - return nil, 0, 0, err - } - } - - // create an overdrive timer - if overdriveTimeout == 0 { - overdriveTimeout = time.Duration(math.MaxInt64) - } - timer := time.NewTimer(overdriveTimeout) - - // create a request buffer - var buffer []*uploader.SectorUploadReq - - // start the timer after the upload has started - // newSlabUpload is quite slow due to computing the sector roots - start := time.Now() - - // collect responses - var used bool - var done bool -loop: - for slab.numInflight > 0 && !done { - select { - case <-u.shutdownCtx.Done(): - return nil, 0, 0, ErrShuttingDown - case <-ctx.Done(): - return nil, 0, 0, context.Cause(ctx) - case resp := <-respChan: - // receive the response - used, done = slab.receive(resp) - if done { - break loop - } - - // relaunch non-overdrive uploads - if resp.Err != nil && !resp.Req.Overdrive { - if err := slab.launch(resp.Req); err != nil { - // a failure to relaunch non-overdrive uploads is bad, but - // we need to keep them around because an overdrive upload - // might've been redundant, in which case we can re-use the - // host to launch this request - buffer = append(buffer, resp.Req) - } - } else if resp.Err == nil && !used { - if len(buffer) > 0 { - // relaunch buffered upload request - if err := slab.launch(buffer[0]); err == nil { - buffer = buffer[1:] - } - } else if slab.canOverdrive(overdriveTimeout) { - // or try overdriving a sector - _ = slab.launch(slab.nextRequest(respChan)) - } - } - case <-timer.C: - // try overdriving a sector - if slab.canOverdrive(overdriveTimeout) { - _ = slab.launch(slab.nextRequest(respChan)) // ignore result - } - } - - // reset the overdrive timer - if overdriveTimeout != math.MaxInt64 { - if !timer.Stop() { - select { - case <-timer.C: - default: - } - } - timer.Reset(overdriveTimeout) - } - } - - // calculate the upload speed - bytes := slab.numUploaded * rhpv2.SectorSize - ms := time.Since(start).Milliseconds() - if ms == 0 { - ms = 1 - } - uploadSpeed = int64(bytes) / ms - - // calculate overdrive pct - var numOverdrive uint64 - if slab.numLaunched > slab.numSectors { - numOverdrive = slab.numLaunched - slab.numSectors - } - overdrivePct = float64(numOverdrive) / float64(slab.numSectors) - - if slab.numUploaded < slab.numSectors { - remaining := slab.numSectors - slab.numUploaded - err = fmt.Errorf("failed to upload slab: launched=%d uploaded=%d remaining=%d inflight=%d pending=%d uploaders=%d errors=%d %w", slab.numLaunched, slab.numUploaded, remaining, slab.numInflight, len(buffer), len(slab.candidates), len(slab.errs), slab.errs) - return - } - - // collect the sectors - for _, sector := range slab.sectors { - sectors = append(sectors, sector.uploaded) - } - return -} - -func (s *slabUpload) canOverdrive(overdriveTimeout time.Duration) bool { - // overdrive is not kicking in yet - remaining := s.numSectors - s.numUploaded - if remaining > s.maxOverdrive { - return false - } - - // overdrive is not due yet - if time.Since(s.lastOverdrive) < overdriveTimeout { - return false - } - - // overdrive is maxed out - if s.numInflight-remaining >= s.maxOverdrive { - return false - } - - return true -} - -func (s *slabUpload) launch(req *uploader.SectorUploadReq) error { - // nothing to do - if req == nil { - return nil - } - - // find candidate - var candidate *candidate - for _, c := range s.candidates { - if c.req != nil { - continue - } - candidate = c - break - } - - // no candidate found - if candidate == nil { - return errNoCandidateUploader - } - - // update the candidate - candidate.req = req - if req.Overdrive { - s.lastOverdrive = time.Now() - s.numOverdriving++ - } - // update the state - s.numInflight++ - s.numLaunched++ - - // enqueue the req - candidate.uploader.Enqueue(req) - return nil -} - -func (s *slabUpload) nextRequest(responseChan chan uploader.SectorUploadResp) *uploader.SectorUploadReq { - // count overdrives - overdriveCnts := make(map[int]int) - for _, c := range s.candidates { - if c.req != nil && c.req.Overdrive { - overdriveCnts[c.req.Idx]++ - } - } - - // overdrive the sector with the least amount of overdrives - lowestNumOverdrives := math.MaxInt - var nextSector *sectorUpload - for sI, sector := range s.sectors { - if !sector.isUploaded() && overdriveCnts[sI] < lowestNumOverdrives { - lowestNumOverdrives = overdriveCnts[sI] - nextSector = sector - } - } - if nextSector == nil { - return nil - } - - return uploader.NewUploadRequest(nextSector.ctx, nextSector.data, nextSector.index, responseChan, nextSector.root, true) -} - -func (s *slabUpload) receive(resp uploader.SectorUploadResp) (bool, bool) { - // convenience variable - req := resp.Req - sector := s.sectors[req.Idx] - - // update the state - if req.Overdrive { - s.numOverdriving-- - } - s.numInflight-- - - // redundant sectors can't complete the upload - if sector.isUploaded() { - // release the candidate - for _, candidate := range s.candidates { - if candidate.req == req { - candidate.req = nil - break - } - } - return false, false - } - - // failed reqs can't complete the upload, we do this after the isUploaded - // check since any error returned for a redundant sector is probably a - // result of the sector ctx being closed - if resp.Err != nil { - s.errs[resp.HK] = resp.Err - return false, false - } - - // store the sector - sector.finish(resp) - - // update uploaded sectors - s.numUploaded++ - - // release memory - s.mem.ReleaseSome(rhpv2.SectorSize) - - return true, s.numUploaded == s.numSectors -} - -func (s *sectorUpload) finish(resp uploader.SectorUploadResp) { - s.mu.Lock() - defer s.mu.Unlock() - - s.cancel(errSectorUploadFinished) - s.uploaded = uploadedSector{ - hk: resp.HK, - fcid: resp.FCID, - root: resp.Req.Root, - } - s.data = nil -} - -func (s *sectorUpload) isUploaded() bool { - return s.uploaded.root != (types.Hash256{}) -} diff --git a/worker/upload_params.go b/worker/upload_params.go deleted file mode 100644 index 6b2b7cb1f..000000000 --- a/worker/upload_params.go +++ /dev/null @@ -1,88 +0,0 @@ -package worker - -import ( - "go.sia.tech/renterd/api" - "go.sia.tech/renterd/object" -) - -type uploadParameters struct { - bucket string - key string - - multipart bool - uploadID string - partNumber int - - ec object.EncryptionKey - encryptionOffset uint64 - - rs api.RedundancySettings - bh uint64 - packing bool - mimeType string - - metadata api.ObjectUserMetadata -} - -func defaultParameters(bucket, key string, rs api.RedundancySettings) uploadParameters { - return uploadParameters{ - bucket: bucket, - key: key, - - ec: object.GenerateEncryptionKey(object.EncryptionKeyTypeSalted), // random key - encryptionOffset: 0, // from the beginning - - rs: rs, - } -} - -type UploadOption func(*uploadParameters) - -func WithBlockHeight(bh uint64) UploadOption { - return func(up *uploadParameters) { - up.bh = bh - } -} - -func WithCustomKey(ec object.EncryptionKey) UploadOption { - return func(up *uploadParameters) { - up.ec = ec - } -} - -func WithCustomEncryptionOffset(offset uint64) UploadOption { - return func(up *uploadParameters) { - up.encryptionOffset = offset - } -} - -func WithMimeType(mimeType string) UploadOption { - return func(up *uploadParameters) { - up.mimeType = mimeType - } -} - -func WithPacking(packing bool) UploadOption { - return func(up *uploadParameters) { - up.packing = packing - } -} - -func WithPartNumber(partNumber int) UploadOption { - return func(up *uploadParameters) { - up.partNumber = partNumber - } -} - -func WithUploadID(uploadID string) UploadOption { - return func(up *uploadParameters) { - up.uploadID = uploadID - up.multipart = true - } -} - -func WithObjectUserMetadata(metadata api.ObjectUserMetadata) UploadOption { - return func(up *uploadParameters) { - up.metadata = metadata - } -} diff --git a/worker/upload_test.go b/worker/upload_test.go index 916e8d146..198cefea0 100644 --- a/worker/upload_test.go +++ b/worker/upload_test.go @@ -13,6 +13,7 @@ import ( "go.sia.tech/renterd/api" "go.sia.tech/renterd/internal/download" "go.sia.tech/renterd/internal/test" + "go.sia.tech/renterd/internal/upload" "go.sia.tech/renterd/object" "lukechampine.com/frand" ) @@ -24,7 +25,7 @@ var ( func TestUpload(t *testing.T) { // create test worker - w := newTestWorker(t) + w := newTestWorker(t, newTestWorkerCfg()) // add hosts to worker w.AddHosts(testRedundancySettings.TotalShards * 2) @@ -41,7 +42,7 @@ func TestUpload(t *testing.T) { params := testParameters(t.Name()) // upload data - _, _, err := ul.Upload(context.Background(), bytes.NewReader(data), w.Contracts(), params) + _, _, err := ul.Upload(context.Background(), bytes.NewReader(data), w.UploadHosts(), params) if err != nil { t.Fatal(err) } @@ -72,24 +73,24 @@ func TestUpload(t *testing.T) { // filter contracts to have (at most) min shards used contracts var n int var filtered []api.HostInfo - for _, md := range w.Contracts() { + for _, h := range w.UploadHosts() { // add unused contracts - host, err := w.bus.Host(context.Background(), md.HostKey) + host, err := w.bus.Host(context.Background(), h.PublicKey) if err != nil { t.Fatal(err) } - if _, used := used[md.HostKey]; !used { + if _, used := used[h.PublicKey]; !used { filtered = append(filtered, api.HostInfo{ - PublicKey: md.HostKey, + PublicKey: h.PublicKey, SiamuxAddr: host.Settings.SiamuxAddr(), }) continue } // add min shards used contracts - if n < int(params.rs.MinShards) { + if n < int(params.RS.MinShards) { filtered = append(filtered, api.HostInfo{ - PublicKey: md.HostKey, + PublicKey: h.PublicKey, SiamuxAddr: host.Settings.SiamuxAddr(), }) n++ @@ -121,8 +122,8 @@ func TestUpload(t *testing.T) { } // try and upload into a bucket that does not exist - params.bucket = "doesnotexist" - _, _, err = ul.Upload(context.Background(), bytes.NewReader(data), w.Contracts(), params) + params.Bucket = "doesnotexist" + _, _, err = ul.Upload(context.Background(), bytes.NewReader(data), w.UploadHosts(), params) if !errors.Is(err, api.ErrBucketNotFound) { t.Fatal("expected bucket not found error", err) } @@ -130,15 +131,15 @@ func TestUpload(t *testing.T) { // upload data using a cancelled context - assert we don't hang ctx, cancel := context.WithCancel(context.Background()) cancel() - _, _, err = ul.Upload(ctx, bytes.NewReader(data), w.Contracts(), params) - if err == nil || !errors.Is(err, errUploadInterrupted) { + _, _, err = ul.Upload(ctx, bytes.NewReader(data), w.UploadHosts(), params) + if err == nil || !errors.Is(err, upload.ErrUploadCancelled) { t.Fatal(err) } } func TestUploadPackedSlab(t *testing.T) { // create test worker - w := newTestWorker(t) + w := newTestWorker(t, newTestWorkerCfg()) // add hosts to worker w.AddHosts(testRedundancySettings.TotalShards) @@ -151,14 +152,13 @@ func TestUploadPackedSlab(t *testing.T) { // create upload params params := testParameters(t.Name()) - params.packing = true - opts := []UploadOption{WithPacking(true)} + opts := []upload.Option{upload.WithPacking(true)} // create test data data := frand.Bytes(128) // upload data - _, _, err := ul.Upload(context.Background(), bytes.NewReader(data), w.Contracts(), params) + _, _, err := ul.Upload(context.Background(), bytes.NewReader(data), w.UploadHosts(), params) if err != nil { t.Fatal(err) } @@ -184,7 +184,7 @@ func TestUploadPackedSlab(t *testing.T) { } // fetch packed slabs for upload - pss, err := os.PackedSlabsForUpload(context.Background(), time.Minute, uint8(params.rs.MinShards), uint8(params.rs.TotalShards), 1) + pss, err := os.PackedSlabsForUpload(context.Background(), time.Minute, uint8(params.RS.MinShards), uint8(params.RS.TotalShards), 1) if err != nil { t.Fatal(err) } else if len(pss) != 1 { @@ -193,8 +193,8 @@ func TestUploadPackedSlab(t *testing.T) { ps := pss[0] // upload the packed slab - mem := mm.AcquireMemory(context.Background(), params.rs.SlabSize()) - err = ul.UploadPackedSlab(context.Background(), params.rs, ps, mem, w.Contracts(), 0) + mem := mm.AcquireMemory(context.Background(), params.RS.SlabSize()) + err = ul.UploadPackedSlab(context.Background(), params.RS, ps, mem, w.UploadHosts(), 0) if err != nil { t.Fatal(err) } @@ -230,8 +230,8 @@ func TestUploadPackedSlab(t *testing.T) { var c int uploadBytes := func(n int) { t.Helper() - params.key = fmt.Sprintf("%s_%d", t.Name(), c) - _, err := w.upload(context.Background(), params.bucket, params.key, testRedundancySettings, bytes.NewReader(frand.Bytes(n)), w.Contracts(), opts...) + params.Key = fmt.Sprintf("%s_%d", t.Name(), c) + _, err := w.upload(context.Background(), params.Bucket, params.Key, testRedundancySettings, bytes.NewReader(frand.Bytes(n)), w.UploadHosts(), opts...) if err != nil { t.Fatal(err) } @@ -276,7 +276,7 @@ func TestUploadPackedSlab(t *testing.T) { func TestMigrateLostSector(t *testing.T) { // create test worker - w := newTestWorker(t) + w := newTestWorker(t, newTestWorkerCfg()) // add hosts to worker w.AddHosts(testRedundancySettings.TotalShards * 2) @@ -294,7 +294,7 @@ func TestMigrateLostSector(t *testing.T) { params := testParameters(t.Name()) // upload data - _, _, err := ul.Upload(context.Background(), bytes.NewReader(data), w.Contracts(), params) + _, _, err := ul.Upload(context.Background(), bytes.NewReader(data), w.UploadHosts(), params) if err != nil { t.Fatal(err) } @@ -340,12 +340,12 @@ func TestMigrateLostSector(t *testing.T) { // filter it down to the shards we need to migrate shards = shards[:1] - // recreate upload contracts - contracts := make([]hostContract, 0) - for _, c := range w.Contracts() { - _, used := usedHosts[c.HostKey] - if !used && c.HostKey != badHost { - contracts = append(contracts, c) + // recreate upload hosts + hosts := make([]upload.HostInfo, 0) + for _, h := range w.UploadHosts() { + _, used := usedHosts[h.PublicKey] + if !used && h.PublicKey != badHost { + hosts = append(hosts, h) } } @@ -357,7 +357,7 @@ func TestMigrateLostSector(t *testing.T) { // migrate the shard away from the bad host mem := mm.AcquireMemory(context.Background(), rhpv2.SectorSize) - err = ul.UploadShards(context.Background(), o.Object.Slabs[0].Slab, []int{0}, shards, contracts, 0, mem) + err = ul.UploadShards(context.Background(), o.Object.Slabs[0].Slab, []int{0}, shards, hosts, 0, mem) if err != nil { t.Fatal(err) } @@ -392,7 +392,7 @@ func TestMigrateLostSector(t *testing.T) { func TestUploadShards(t *testing.T) { // create test worker - w := newTestWorker(t) + w := newTestWorker(t, newTestWorkerCfg()) // add hosts to worker w.AddHosts(testRedundancySettings.TotalShards * 2) @@ -410,7 +410,7 @@ func TestUploadShards(t *testing.T) { params := testParameters(t.Name()) // upload data - _, _, err := ul.Upload(context.Background(), bytes.NewReader(data), w.Contracts(), params) + _, _, err := ul.Upload(context.Background(), bytes.NewReader(data), w.UploadHosts(), params) if err != nil { t.Fatal(err) } @@ -459,19 +459,19 @@ func TestUploadShards(t *testing.T) { } shards = shards[:len(badIndices)] - // recreate upload contracts - contracts := make([]hostContract, 0) - for _, c := range w.Contracts() { - _, used := usedHosts[c.HostKey] - _, bad := badHosts[c.HostKey] + // recreate upload hosts + hosts := make([]upload.HostInfo, 0) + for _, h := range w.UploadHosts() { + _, used := usedHosts[h.PublicKey] + _, bad := badHosts[h.PublicKey] if !used && !bad { - contracts = append(contracts, c) + hosts = append(hosts, h) } } // migrate those shards away from bad hosts mem := mm.AcquireMemory(context.Background(), uint64(len(badIndices))*rhpv2.SectorSize) - err = ul.UploadShards(context.Background(), o.Object.Slabs[0].Slab, badIndices, shards, contracts, 0, mem) + err = ul.UploadShards(context.Background(), o.Object.Slabs[0].Slab, badIndices, shards, hosts, 0, mem) if err != nil { t.Fatal(err) } @@ -508,15 +508,15 @@ func TestUploadShards(t *testing.T) { } // create download contracts - var hosts []api.HostInfo - for _, c := range w.Contracts() { - if _, bad := badHosts[c.HostKey]; !bad { - host, err := w.bus.Host(context.Background(), c.HostKey) + var infos []api.HostInfo + for _, h := range w.UploadHosts() { + if _, bad := badHosts[h.PublicKey]; !bad { + host, err := w.bus.Host(context.Background(), h.PublicKey) if err != nil { t.Fatal(err) } - hosts = append(hosts, api.HostInfo{ - PublicKey: c.HostKey, + infos = append(infos, api.HostInfo{ + PublicKey: h.PublicKey, SiamuxAddr: host.Settings.SiamuxAddr(), }) } @@ -524,7 +524,7 @@ func TestUploadShards(t *testing.T) { // download the data and assert it matches var buf bytes.Buffer - err = dl.DownloadObject(context.Background(), &buf, *o.Object, 0, uint64(o.Size), hosts) + err = dl.DownloadObject(context.Background(), &buf, *o.Object, 0, uint64(o.Size), infos) if err != nil { t.Fatal(err) } else if !bytes.Equal(data, buf.Bytes()) { @@ -532,80 +532,41 @@ func TestUploadShards(t *testing.T) { } } -func TestRefreshUploaders(t *testing.T) { +func TestUploadSingleSectorSlowHosts(t *testing.T) { // create test worker - w := newTestWorker(t) + cfg := newTestWorkerCfg() + slowHosts := 5 + cfg.UploadMaxOverdrive = uint64(slowHosts) + cfg.UploadOverdriveTimeout = time.Second - // add hosts to worker - w.AddHosts(testRedundancySettings.TotalShards) + w := newTestWorker(t, cfg) - // convenience variables - ul := w.uploadManager - cs := w.cs - hm := w.hm - bh := uint64(1) - - // refresh uploaders - contracts := w.Contracts() - ul.refreshUploaders(contracts, bh) - - // assert we have the expected number of uploaders - if len(ul.uploaders) != len(contracts) { - t.Fatalf("unexpected number of uploaders, %v != %v", len(ul.uploaders), len(contracts)) - } - - // renew the first contract - c1 := contracts[0] - c1Renewed := w.RenewContract(c1.HostKey) - - // remove the host from the second contract - c2 := contracts[1] - delete(hm.hosts, c2.HostKey) - cs.DeleteContracdt(c2.ID) - - // add a new host/contract - hNew := w.AddHost() - - // refresh uploaders - contracts = w.Contracts() - ul.refreshUploaders(contracts, bh) - - // assert we added and renewed exactly one uploader - var added, renewed int - for _, ul := range ul.uploaders { - switch ul.ContractID() { - case hNew.ID(): - added++ - case c1Renewed.ID(): - renewed++ - default: - } - } - if added != 1 { - t.Fatalf("expected 1 added uploader, got %v", added) - } else if renewed != 1 { - t.Fatalf("expected 1 renewed uploader, got %v", renewed) + // add hosts to worker + minShards := 10 + totalShards := 30 + hosts := w.AddHosts(totalShards + slowHosts) + for i := 0; i < slowHosts; i++ { + hosts[i].uploadDelay = time.Hour } - // assert we have one more uploader than we used to - if len(ul.uploaders) != len(contracts)+1 { - t.Fatalf("unexpected number of uploaders, %v != %v", len(ul.uploaders), len(contracts)+1) - } + // create test data + data := frand.Bytes(rhpv2.SectorSize * minShards) - // refresh uploaders, use blockheight that expires most uploaders - bh = c1.WindowEnd - contracts = w.Contracts() - ul.refreshUploaders(contracts, bh) + // create upload params + params := testParameters(t.Name()) + params.RS.MinShards = minShards + params.RS.TotalShards = totalShards - // assert we only have one uploader left - if len(ul.uploaders) != 1 { - t.Fatalf("unexpected number of uploaders, %v != %v", len(ul.uploaders), 1) + // upload data + _, _, err := w.uploadManager.Upload(context.Background(), bytes.NewReader(data), w.UploadHosts(), params) + if err != nil { + t.Fatal(err) } } func TestUploadRegression(t *testing.T) { // create test worker - w := newTestWorker(t) + w := newTestWorker(t, newTestWorkerCfg()) // add hosts to worker w.AddHosts(testRedundancySettings.TotalShards) @@ -626,8 +587,8 @@ func TestUploadRegression(t *testing.T) { // upload data ctx, cancel := context.WithTimeout(context.Background(), time.Second) defer cancel() - _, err := w.upload(ctx, params.bucket, params.key, testRedundancySettings, bytes.NewReader(data), w.Contracts()) - if !errors.Is(err, errUploadInterrupted) { + _, err := w.upload(ctx, params.Bucket, params.Key, testRedundancySettings, bytes.NewReader(data), w.UploadHosts()) + if !errors.Is(err, upload.ErrUploadCancelled) { t.Fatal(err) } @@ -635,7 +596,7 @@ func TestUploadRegression(t *testing.T) { unblock() // upload data - _, err = w.upload(context.Background(), params.bucket, params.key, testRedundancySettings, bytes.NewReader(data), w.Contracts()) + _, err = w.upload(context.Background(), params.Bucket, params.Key, testRedundancySettings, bytes.NewReader(data), w.UploadHosts()) if err != nil { t.Fatal(err) } @@ -656,45 +617,14 @@ func TestUploadRegression(t *testing.T) { } } -func TestUploadSingleSectorSlowHosts(t *testing.T) { - // create test worker - w := newTestWorker(t) - - // add hosts to worker - minShards := 10 - totalShards := 30 - slowHosts := 5 - w.uploadManager.maxOverdrive = uint64(slowHosts) - w.uploadManager.overdriveTimeout = time.Second - hosts := w.AddHosts(totalShards + slowHosts) - - for i := 0; i < slowHosts; i++ { - hosts[i].uploadDelay = time.Hour - } - - // create test data - data := frand.Bytes(rhpv2.SectorSize * minShards) - - // create upload params - params := testParameters(t.Name()) - params.rs.MinShards = minShards - params.rs.TotalShards = totalShards - - // upload data - _, _, err := w.uploadManager.Upload(context.Background(), bytes.NewReader(data), w.Contracts(), params) - if err != nil { - t.Fatal(err) - } -} - -func testParameters(key string) uploadParameters { - return uploadParameters{ - bucket: testBucket, - key: key, +func testParameters(key string) upload.Parameters { + return upload.Parameters{ + Bucket: testBucket, + Key: key, - ec: object.GenerateEncryptionKey(object.EncryptionKeyTypeBasic), // random key - encryptionOffset: 0, // from the beginning + EC: object.GenerateEncryptionKey(object.EncryptionKeyTypeBasic), // random key + EncryptionOffset: 0, // from the beginning - rs: testRedundancySettings, + RS: testRedundancySettings, } } diff --git a/worker/upload_utils.go b/worker/upload_utils.go deleted file mode 100644 index 6dfc9b729..000000000 --- a/worker/upload_utils.go +++ /dev/null @@ -1,28 +0,0 @@ -package worker - -import ( - "bytes" - "io" - - "github.com/gabriel-vasile/mimetype" - "go.sia.tech/renterd/object" -) - -func encryptPartialSlab(data []byte, key object.EncryptionKey, minShards, totalShards uint8) [][]byte { - slab := object.Slab{ - EncryptionKey: key, - MinShards: minShards, - Shards: make([]object.Sector, totalShards), - } - encodedShards := make([][]byte, totalShards) - slab.Encode(data, encodedShards) - slab.Encrypt(encodedShards) - return encodedShards -} - -func newMimeReader(r io.Reader) (mimeType string, recycled io.Reader, err error) { - buf := bytes.NewBuffer(nil) - mtype, err := mimetype.DetectReader(io.TeeReader(r, buf)) - recycled = io.MultiReader(buf, r) - return mtype.String(), recycled, err -} diff --git a/worker/worker.go b/worker/worker.go index ef26b7242..7beabd0f4 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -31,6 +31,7 @@ import ( rhp2 "go.sia.tech/renterd/internal/rhp/v2" rhp3 "go.sia.tech/renterd/internal/rhp/v3" rhp4 "go.sia.tech/renterd/internal/rhp/v4" + "go.sia.tech/renterd/internal/upload" "go.sia.tech/renterd/internal/utils" iworker "go.sia.tech/renterd/internal/worker" "go.sia.tech/renterd/object" @@ -172,7 +173,7 @@ type Worker struct { startTime time.Time downloadManager *download.Manager - uploadManager *uploadManager + uploadManager *upload.Manager accounts *iworker.AccountMgr dialer *rhp.FallbackDialer @@ -308,7 +309,7 @@ func (w *Worker) uploadsStatsHandlerGET(jc jape.Context) { // prepare upload stats var uss []api.UploaderStats - for hk, mbps := range stats.uploadSpeedsMBPS { + for hk, mbps := range stats.UploadSpeedsMBPS { uss = append(uss, api.UploaderStats{ HostKey: hk, AvgSectorUploadSpeedMBPS: mbps, @@ -320,10 +321,10 @@ func (w *Worker) uploadsStatsHandlerGET(jc jape.Context) { // encode response api.WriteResponse(jc, api.UploadStatsResponse{ - AvgSlabUploadSpeedMBPS: math.Ceil(stats.avgSlabUploadSpeedMBPS*100) / 100, - AvgOverdrivePct: math.Floor(stats.avgOverdrivePct*100*100) / 100, - HealthyUploaders: stats.healthyUploaders, - NumUploaders: stats.numUploaders, + AvgSlabUploadSpeedMBPS: math.Ceil(stats.AvgSlabUploadSpeedMBPS*100) / 100, + AvgOverdrivePct: math.Floor(stats.AvgOverdrivePct*100*100) / 100, + HealthyUploaders: stats.HealthyUploaders, + NumUploaders: stats.NumUploaders, UploadersStats: uss, }) } @@ -625,7 +626,7 @@ func (w *Worker) idHandlerGET(jc jape.Context) { func (w *Worker) memoryGET(jc jape.Context) { api.WriteResponse(jc, api.MemoryResponse{ Download: w.downloadManager.MemoryStatus(), - Upload: w.uploadManager.mm.Status(), + Upload: w.uploadManager.MemoryStatus(), }) } @@ -722,7 +723,7 @@ func New(cfg config.Worker, masterKey [32]byte, b Bus, l *zap.Logger) (*Worker, uploadKey := w.masterKey.DeriveUploadKey() w.downloadManager = download.NewManager(w.shutdownCtx, &uploadKey, w, w.bus, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) - w.initUploadManager(&uploadKey, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) + w.uploadManager = upload.NewManager(w.shutdownCtx, &uploadKey, w, w.bus, w.bus, w.bus, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) w.initContractSpendingRecorder(cfg.BusFlushInterval) return w, nil @@ -948,14 +949,14 @@ func (w *Worker) UploadObject(ctx context.Context, r io.Reader, bucket, key stri // upload eTag, err := w.upload(ctx, bucket, key, up.RedundancySettings, r, contracts, - WithBlockHeight(up.CurrentHeight), - WithMimeType(opts.MimeType), - WithPacking(up.UploadPacking), - WithObjectUserMetadata(opts.Metadata), + upload.WithBlockHeight(up.CurrentHeight), + upload.WithMimeType(opts.MimeType), + upload.WithPacking(up.UploadPacking), + upload.WithObjectUserMetadata(opts.Metadata), ) if err != nil { w.logger.With(zap.Error(err)).With("key", key).With("bucket", bucket).Error("failed to upload object") - if !errors.Is(err, ErrShuttingDown) && !errors.Is(err, errUploadInterrupted) && !errors.Is(err, context.Canceled) { + if !errors.Is(err, ErrShuttingDown) && !errors.Is(err, upload.ErrUploadCancelled) && !errors.Is(err, context.Canceled) { w.registerAlert(newUploadFailedAlert(bucket, key, opts.MimeType, up.RedundancySettings.MinShards, up.RedundancySettings.TotalShards, len(contracts), up.UploadPacking, false, err)) } return nil, fmt.Errorf("couldn't upload object: %w", err) @@ -973,7 +974,7 @@ func (w *Worker) UploadMultipartUploadPart(ctx context.Context, r io.Reader, buc } // fetch upload from bus - upload, err := w.bus.MultipartUpload(ctx, uploadID) + mu, err := w.bus.MultipartUpload(ctx, uploadID) if err != nil { return nil, fmt.Errorf("couldn't fetch multipart upload: %w", err) } @@ -982,21 +983,21 @@ func (w *Worker) UploadMultipartUploadPart(ctx context.Context, r io.Reader, buc ctx = WithGougingChecker(ctx, w.bus, up.GougingParams) // prepare opts - uploadOpts := []UploadOption{ - WithBlockHeight(up.CurrentHeight), - WithPacking(up.UploadPacking), - WithCustomKey(upload.EncryptionKey), - WithPartNumber(partNumber), - WithUploadID(uploadID), + uploadOpts := []upload.Option{ + upload.WithBlockHeight(up.CurrentHeight), + upload.WithPacking(up.UploadPacking), + upload.WithCustomKey(mu.EncryptionKey), + upload.WithPartNumber(partNumber), + upload.WithUploadID(uploadID), } // make sure only one of the following is set - if encryptionEnabled := !upload.EncryptionKey.IsNoopKey(); encryptionEnabled && opts.EncryptionOffset == nil { + if encryptionEnabled := !mu.EncryptionKey.IsNoopKey(); encryptionEnabled && opts.EncryptionOffset == nil { return nil, fmt.Errorf("%w: if object encryption (pre-erasure coding) wasn't disabled by creating the multipart upload with the no-op key, the offset needs to be set", api.ErrInvalidMultipartEncryptionSettings) } else if opts.EncryptionOffset != nil && *opts.EncryptionOffset < 0 { return nil, fmt.Errorf("%w: encryption offset must be positive", api.ErrInvalidMultipartEncryptionSettings) } else if encryptionEnabled { - uploadOpts = append(uploadOpts, WithCustomEncryptionOffset(uint64(*opts.EncryptionOffset))) + uploadOpts = append(uploadOpts, upload.WithCustomEncryptionOffset(uint64(*opts.EncryptionOffset))) } // fetch host & contract info @@ -1009,7 +1010,7 @@ func (w *Worker) UploadMultipartUploadPart(ctx context.Context, r io.Reader, buc eTag, err := w.upload(ctx, bucket, path, up.RedundancySettings, r, contracts, uploadOpts...) if err != nil { w.logger.With(zap.Error(err)).With("path", path).With("bucket", bucket).Error("failed to upload object") - if !errors.Is(err, ErrShuttingDown) && !errors.Is(err, errUploadInterrupted) && !errors.Is(err, context.Canceled) { + if !errors.Is(err, ErrShuttingDown) && !errors.Is(err, upload.ErrUploadCancelled) && !errors.Is(err, context.Canceled) { w.registerAlert(newUploadFailedAlert(bucket, path, "", up.RedundancySettings.MinShards, up.RedundancySettings.TotalShards, len(contracts), up.UploadPacking, false, err)) } return nil, fmt.Errorf("couldn't upload object: %w", err) diff --git a/worker/worker_test.go b/worker/worker_test.go index 2f4e5069d..b7d1f309e 100644 --- a/worker/worker_test.go +++ b/worker/worker_test.go @@ -12,6 +12,7 @@ import ( "go.sia.tech/renterd/internal/download" "go.sia.tech/renterd/internal/test" "go.sia.tech/renterd/internal/test/mocks" + "go.sia.tech/renterd/internal/upload" "go.sia.tech/renterd/internal/utils" "go.uber.org/zap" "golang.org/x/crypto/blake2b" @@ -34,7 +35,7 @@ type ( } ) -func newTestWorker(t test.TestingCommon) *testWorker { +func newTestWorker(t test.TestingCommon, cfg config.Worker) *testWorker { // create bus dependencies cs := mocks.NewContractStore() os := mocks.NewObjectStore(testBucket, cs) @@ -46,7 +47,6 @@ func newTestWorker(t test.TestingCommon) *testWorker { ulmm := mocks.NewMemoryManager() // create worker - cfg := newTestWorkerCfg() mk := utils.MasterKey(blake2b.Sum256([]byte("testwork"))) w, err := New(cfg, mk, b, zap.NewNop()) if err != nil { @@ -57,8 +57,7 @@ func newTestWorker(t test.TestingCommon) *testWorker { hm := newTestHostManager(t) uploadKey := mk.DeriveUploadKey() w.downloadManager = download.NewManager(context.Background(), &uploadKey, hm, b, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, zap.NewNop()) - w.uploadManager.hm = hm - w.uploadManager.mm = ulmm + w.uploadManager = upload.NewManager(context.Background(), &uploadKey, hm, b, b, b, cfg.DownloadMaxMemory, cfg.DownloadMaxOverdrive, cfg.DownloadOverdriveTimeout, zap.NewNop()) return &testWorker{ test.NewTT(t), @@ -91,21 +90,21 @@ func (w *testWorker) BlockUploads() func() { return w.ulmm.Block() } -func (w *testWorker) BlockAsyncPackedSlabUploads(up uploadParameters) { +func (w *testWorker) BlockAsyncPackedSlabUploads(up upload.Parameters) { w.uploadsMu.Lock() defer w.uploadsMu.Unlock() - key := fmt.Sprintf("%d-%d", up.rs.MinShards, up.rs.TotalShards) + key := fmt.Sprintf("%d-%d", up.RS.MinShards, up.RS.TotalShards) w.uploadingPackedSlabs[key] = struct{}{} } -func (w *testWorker) UnblockAsyncPackedSlabUploads(up uploadParameters) { +func (w *testWorker) UnblockAsyncPackedSlabUploads(up upload.Parameters) { w.uploadsMu.Lock() defer w.uploadsMu.Unlock() - key := fmt.Sprintf("%d-%d", up.rs.MinShards, up.rs.TotalShards) + key := fmt.Sprintf("%d-%d", up.RS.MinShards, up.RS.TotalShards) delete(w.uploadingPackedSlabs, key) } -func (w *testWorker) Contracts() (hcs []hostContract) { +func (w *testWorker) UploadHosts() (hcs []upload.HostInfo) { hosts, err := w.hs.UsableHosts(context.Background()) if err != nil { w.tt.Fatal(err) @@ -121,7 +120,12 @@ func (w *testWorker) Contracts() (hcs []hostContract) { } for _, c := range contracts { if h, ok := hmap[c.HostKey]; ok { - hcs = append(hcs, hostContract{c, h}) + hcs = append(hcs, upload.HostInfo{ + HostInfo: h, + ContractEndHeight: c.WindowEnd, + ContractID: c.ID, + ContractRenewedFrom: c.RenewedFrom, + }) } } From 66dddf1104e0c3df37d8414b8f0460f6bd136c74 Mon Sep 17 00:00:00 2001 From: PJ Date: Mon, 9 Dec 2024 14:22:26 +0100 Subject: [PATCH 03/14] docs: add changelog file --- .changeset/move_upload_manager_to_internal_package.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/move_upload_manager_to_internal_package.md diff --git a/.changeset/move_upload_manager_to_internal_package.md b/.changeset/move_upload_manager_to_internal_package.md new file mode 100644 index 000000000..3e032eb87 --- /dev/null +++ b/.changeset/move_upload_manager_to_internal_package.md @@ -0,0 +1,5 @@ +--- +default: patch +--- + +# Move upload manager to internal package From 119609d2a34bdbb60b07cd545f8aa6d8419594e6 Mon Sep 17 00:00:00 2001 From: PJ Date: Mon, 9 Dec 2024 16:10:14 +0100 Subject: [PATCH 04/14] testing: fix upload tests --- internal/download/downloadmanager.go | 4 ++-- internal/upload/uploadmanager.go | 4 ++-- internal/upload/uploadmanager_test.go | 2 +- worker/upload_test.go | 4 ++-- worker/worker.go | 9 +++++++-- worker/worker_test.go | 4 ++-- 6 files changed, 16 insertions(+), 11 deletions(-) diff --git a/internal/download/downloadmanager.go b/internal/download/downloadmanager.go index 98a9e6cbc..f00cb4f0d 100644 --- a/internal/download/downloadmanager.go +++ b/internal/download/downloadmanager.go @@ -115,11 +115,11 @@ func (s *sectorInfo) selectHost(h types.PublicKey) { } } -func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, os ObjectStore, maxMemory, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { +func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, mm memory.MemoryManager, os ObjectStore, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { logger = logger.Named("downloadmanager") return &Manager{ hm: hm, - mm: memory.NewManager(maxMemory, logger), + mm: mm, os: os, uploadKey: uploadKey, logger: logger.Sugar(), diff --git a/internal/upload/uploadmanager.go b/internal/upload/uploadmanager.go index 979bebd7f..feca91886 100644 --- a/internal/upload/uploadmanager.go +++ b/internal/upload/uploadmanager.go @@ -150,11 +150,11 @@ type ( } ) -func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, os ObjectStore, cl ContractLocker, cs uploader.ContractStore, maxMemory, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { +func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, mm memory.MemoryManager, os ObjectStore, cl ContractLocker, cs uploader.ContractStore, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { logger = logger.Named("uploadmanager") return &Manager{ hm: hm, - mm: memory.NewManager(maxMemory, logger), + mm: mm, os: os, cl: cl, cs: cs, diff --git a/internal/upload/uploadmanager_test.go b/internal/upload/uploadmanager_test.go index cf6932df2..4447ec609 100644 --- a/internal/upload/uploadmanager_test.go +++ b/internal/upload/uploadmanager_test.go @@ -20,7 +20,7 @@ func (hm *hostManager) Host(hk types.PublicKey, fcid types.FileContractID, siamu func TestRefreshUploaders(t *testing.T) { hm := &hostManager{} - ul := NewManager(context.Background(), nil, hm, nil, nil, nil, 0, 0, 0, zap.NewNop()) + ul := NewManager(context.Background(), nil, hm, nil, nil, nil, nil, 0, 0, zap.NewNop()) // prepare host info hi := HostInfo{ diff --git a/worker/upload_test.go b/worker/upload_test.go index 198cefea0..846359fe8 100644 --- a/worker/upload_test.go +++ b/worker/upload_test.go @@ -152,7 +152,7 @@ func TestUploadPackedSlab(t *testing.T) { // create upload params params := testParameters(t.Name()) - opts := []upload.Option{upload.WithPacking(true)} + params.Packing = true // create test data data := frand.Bytes(128) @@ -231,7 +231,7 @@ func TestUploadPackedSlab(t *testing.T) { uploadBytes := func(n int) { t.Helper() params.Key = fmt.Sprintf("%s_%d", t.Name(), c) - _, err := w.upload(context.Background(), params.Bucket, params.Key, testRedundancySettings, bytes.NewReader(frand.Bytes(n)), w.UploadHosts(), opts...) + _, err := w.upload(context.Background(), params.Bucket, params.Key, testRedundancySettings, bytes.NewReader(frand.Bytes(n)), w.UploadHosts(), upload.WithPacking(true)) if err != nil { t.Fatal(err) } diff --git a/worker/worker.go b/worker/worker.go index 7beabd0f4..0e3cd2f88 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -26,6 +26,7 @@ import ( "go.sia.tech/renterd/config" "go.sia.tech/renterd/internal/download" "go.sia.tech/renterd/internal/gouging" + "go.sia.tech/renterd/internal/memory" "go.sia.tech/renterd/internal/prices" "go.sia.tech/renterd/internal/rhp" rhp2 "go.sia.tech/renterd/internal/rhp/v2" @@ -722,8 +723,12 @@ func New(cfg config.Worker, masterKey [32]byte, b Bus, l *zap.Logger) (*Worker, } uploadKey := w.masterKey.DeriveUploadKey() - w.downloadManager = download.NewManager(w.shutdownCtx, &uploadKey, w, w.bus, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) - w.uploadManager = upload.NewManager(w.shutdownCtx, &uploadKey, w, w.bus, w.bus, w.bus, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) + + dlmm := memory.NewManager(cfg.UploadMaxMemory, l.Named("uploadmanager")) + w.downloadManager = download.NewManager(w.shutdownCtx, &uploadKey, w, dlmm, w.bus, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) + + ulmm := memory.NewManager(cfg.UploadMaxMemory, l.Named("uploadmanager")) + w.uploadManager = upload.NewManager(w.shutdownCtx, &uploadKey, w, ulmm, w.bus, w.bus, w.bus, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) w.initContractSpendingRecorder(cfg.BusFlushInterval) return w, nil diff --git a/worker/worker_test.go b/worker/worker_test.go index b7d1f309e..7e634f1f4 100644 --- a/worker/worker_test.go +++ b/worker/worker_test.go @@ -56,8 +56,8 @@ func newTestWorker(t test.TestingCommon, cfg config.Worker) *testWorker { // override managers hm := newTestHostManager(t) uploadKey := mk.DeriveUploadKey() - w.downloadManager = download.NewManager(context.Background(), &uploadKey, hm, b, cfg.UploadMaxMemory, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, zap.NewNop()) - w.uploadManager = upload.NewManager(context.Background(), &uploadKey, hm, b, b, b, cfg.DownloadMaxMemory, cfg.DownloadMaxOverdrive, cfg.DownloadOverdriveTimeout, zap.NewNop()) + w.downloadManager = download.NewManager(context.Background(), &uploadKey, hm, dlmm, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, zap.NewNop()) + w.uploadManager = upload.NewManager(context.Background(), &uploadKey, hm, ulmm, b, b, b, cfg.DownloadMaxOverdrive, cfg.DownloadOverdriveTimeout, zap.NewNop()) return &testWorker{ test.NewTT(t), From 88a44555111ef79925dd85cfdaa4b202fe573f5c Mon Sep 17 00:00:00 2001 From: PJ Date: Mon, 9 Dec 2024 16:20:09 +0100 Subject: [PATCH 05/14] testing: fix parameter order --- worker/worker_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/worker/worker_test.go b/worker/worker_test.go index 7e634f1f4..0c731766b 100644 --- a/worker/worker_test.go +++ b/worker/worker_test.go @@ -56,8 +56,8 @@ func newTestWorker(t test.TestingCommon, cfg config.Worker) *testWorker { // override managers hm := newTestHostManager(t) uploadKey := mk.DeriveUploadKey() - w.downloadManager = download.NewManager(context.Background(), &uploadKey, hm, dlmm, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, zap.NewNop()) - w.uploadManager = upload.NewManager(context.Background(), &uploadKey, hm, ulmm, b, b, b, cfg.DownloadMaxOverdrive, cfg.DownloadOverdriveTimeout, zap.NewNop()) + w.downloadManager = download.NewManager(context.Background(), &uploadKey, hm, dlmm, b, cfg.DownloadMaxOverdrive, cfg.DownloadOverdriveTimeout, zap.NewNop()) + w.uploadManager = upload.NewManager(context.Background(), &uploadKey, hm, ulmm, b, b, b, cfg.UploadMaxMemory, cfg.UploadOverdriveTimeout, zap.NewNop()) return &testWorker{ test.NewTT(t), From 066a6c47bae7b963abd1b457245bd876a6e31f66 Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 10 Dec 2024 12:39:58 +0100 Subject: [PATCH 06/14] cluster: move migrations from worker to autopilot --- README.md | 18 +- autopilot/alerts.go | 33 +- autopilot/autopilot.go | 215 +++++----- autopilot/contractor/contractor.go | 5 + autopilot/migrator.go | 262 ------------ autopilot/migrator/accounts.go | 110 ++++++ autopilot/migrator/alerts.go | 85 ++++ autopilot/migrator/hosts.go | 369 +++++++++++++++++ autopilot/migrator/migrator.go | 372 ++++++++++++++++++ autopilot/migrator/spending.go | 121 ++++++ .../migrator/worker.go | 94 ++++- autopilot/workerpool.go | 48 --- cmd/renterd/config.go | 58 ++- cmd/renterd/node.go | 80 ++-- config/config.go | 57 +-- internal/{worker => accounts}/accounts.go | 48 +-- .../{worker => accounts}/accounts_test.go | 6 +- .../gouging.go => internal/gouging/context.go | 15 +- internal/test/e2e/cluster.go | 30 +- {worker => internal/utils}/reader.go | 2 +- stores/sql/types.go | 2 +- webhooks/webhooks.go | 3 - worker/alerts.go | 46 --- worker/client/client.go | 2 +- worker/host.go | 17 +- worker/upload.go | 3 +- worker/worker.go | 77 +--- 27 files changed, 1446 insertions(+), 732 deletions(-) delete mode 100644 autopilot/migrator.go create mode 100644 autopilot/migrator/accounts.go create mode 100644 autopilot/migrator/alerts.go create mode 100644 autopilot/migrator/hosts.go create mode 100644 autopilot/migrator/migrator.go create mode 100644 autopilot/migrator/spending.go rename worker/migrations.go => autopilot/migrator/worker.go (52%) delete mode 100644 autopilot/workerpool.go rename internal/{worker => accounts}/accounts.go (92%) rename internal/{worker => accounts}/accounts_test.go (95%) rename worker/gouging.go => internal/gouging/context.go (51%) rename {worker => internal/utils}/reader.go (96%) diff --git a/README.md b/README.md index 26a6c8f5d..09f783319 100644 --- a/README.md +++ b/README.md @@ -79,17 +79,19 @@ overview of all settings configurable through the CLI. | `Worker.UploadOverdriveTimeout` | Timeout for overdriving slab uploads | `3s` | `--worker.uploadOverdriveTimeout` | - | `worker.uploadOverdriveTimeout` | | `Worker.Enabled` | Enables/disables worker | `true` | `--worker.enabled` | `RENTERD_WORKER_ENABLED` | `worker.enabled` | | `Worker.AllowUnauthenticatedDownloads` | Allows unauthenticated downloads | - | `--worker.unauthenticatedDownloads` | `RENTERD_WORKER_UNAUTHENTICATED_DOWNLOADS` | `worker.allowUnauthenticatedDownloads` | -| `Worker.RemoteAddrs` | List of remote worker addresses (semicolon delimited) | - | - | `RENTERD_WORKER_REMOTE_ADDRS` | `worker.remotes` | -| `Worker.RemotePassword` | API password for the remote workers | - | - | `RENTERD_WORKER_API_PASSWORD` | `worker.remotes` | | `Autopilot.Enabled` | Enables/disables autopilot | `true` | `--autopilot.enabled` | `RENTERD_AUTOPILOT_ENABLED` | `autopilot.enabled` | | `Autopilot.AccountsRefillInterval` | Interval for refilling workers' account balances | `24h` | `--autopilot.accountRefillInterval` | - | `autopilot.accountsRefillInterval` | | `Autopilot.Heartbeat` | Interval for autopilot loop execution | `30m` | `--autopilot.heartbeat` | - | `autopilot.heartbeat` | -| `Autopilot.MigrationHealthCutoff` | Threshold for migrating slabs based on health | `0.75` | `--autopilot.migrationHealthCutoff` | - | `autopilot.migrationHealthCutoff` | | `Autopilot.RevisionBroadcastInterval`| Interval for broadcasting contract revisions | `168h` (7 days) | `--autopilot.revisionBroadcastInterval` | `RENTERD_AUTOPILOT_REVISION_BROADCAST_INTERVAL` | `autopilot.revisionBroadcastInterval` | | `Autopilot.ScannerBatchSize` | Batch size for host scanning | `1000` | `--autopilot.scannerBatchSize` | - | `autopilot.scannerBatchSize` | | `Autopilot.ScannerInterval` | Interval for scanning hosts | `24h` | `--autopilot.scannerInterval` | - | `autopilot.scannerInterval` | | `Autopilot.ScannerNumThreads` | Number of threads for scanning hosts | `100` | - | - | `autopilot.scannerNumThreads` | -| `Autopilot.MigratorParallelSlabsPerWorker` | Parallel slab migrations per worker | `1` | `--autopilot.migratorParallelSlabsPerWorker` | `RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER` | `autopilot.migratorParallelSlabsPerWorker` | +| `Migrator.HealthCutoff` | Threshold for migrating slabs based on health | `0.75` | `--migrator.healthCutoff` | - | `migrator.HealthCutoff` | +| `Migrator.ParallelSlabsPerWorker` | Parallel slab migrations | `1` | `--migrator.parallelSlabsPerWorker` | `RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER` | `migrator.parallelSlabsPerWorker` | +| `Migrator.DownloadMaxOverdrive` | Max overdrive workers for migration downloads | `5` | `--migrator.downloadMaxOverdrive` | - | `migrator.downloadMaxOverdrive` | +| `Migrator.DownloadOverdriveTimeout` | Timeout for overdriving migration downloads | `3s` | `--migrator.downloadOverdriveTimeout` | - | `migrator.downloadOverdriveTimeout` | +| `Migrator.UploadMaxOverdrive` | Max overdrive workers for migration uploads | `5` | `--migrator.uploadMaxOverdrive` | - | `migrator.uploadMaxOverdrive` | +| `Migrator.UploadOverdriveTimeout` | Timeout for overdriving migration uploads | `3s` | `--migrator.uploadOverdriveTimeout` | - | `migrator.uploadOverdriveTimeout` | | `S3.Address` | Address for serving S3 API | `:9982` | `--s3.address` | `RENTERD_S3_ADDRESS` | `s3.address` | | `S3.DisableAuth` | Disables authentication for S3 API | `false` | `--s3.disableAuth` | `RENTERD_S3_DISABLE_AUTH` | `s3.disableAuth` | | `S3.Enabled` | Enables/disables S3 API | `true` | `--s3.enabled` | `RENTERD_S3_ENABLED` | `s3.enabled` | @@ -135,11 +137,7 @@ occur. Therefor it is important to start the worker after the bus is reachable. To run the autopilot separately, the worker has to be disabled using the `--worker.enabled` flag. Similar to the worker, the autopilot has to be -configured with a remote bus for the node not to start a bus itself. Alongside -with knowing where the bus is located, the autopilot also has to be aware of the -workers. These remote workers can be configured through yaml under the option -`worker.remotes`, or through environment variables -(`RENTERD_WORKER_REMOTE_ADDRS` and `RENTERD_WORKER_API_PASSWORD`). +configured with a remote bus for the node not to start a bus itself. #### Example docker-compose with minimal configuration @@ -193,8 +191,6 @@ services: - RENTERD_API_PASSWORD=autopilot-pass - RENTERD_BUS_API_PASSWORD=bus-pass - RENTERD_BUS_REMOTE_ADDR=http://bus:9980/api/bus - - RENTERD_WORKER_API_PASSWORD= - - RENTERD_WORKER_REMOTE_ADDRS=http://worker-1:9980/api/worker;http://worker-2:9980/api/worker ports: - "9984:9980" depends_on: diff --git a/autopilot/alerts.go b/autopilot/alerts.go index c30740fcb..0a5c6e7be 100644 --- a/autopilot/alerts.go +++ b/autopilot/alerts.go @@ -10,10 +10,8 @@ import ( ) var ( - alertHealthRefreshID = alerts.RandomAlertID() // constant until restarted - alertLowBalanceID = alerts.RandomAlertID() // constant until restarted - alertOngoingMigrationsID = alerts.RandomAlertID() // constant until restarted - alertPruningID = alerts.RandomAlertID() // constant until restarted + alertLowBalanceID = alerts.RandomAlertID() // constant until restarted + alertPruningID = alerts.RandomAlertID() // constant until restarted ) func (ap *Autopilot) RegisterAlert(ctx context.Context, a alerts.Alert) { @@ -58,30 +56,3 @@ func newContractPruningFailedAlert(hk types.PublicKey, version, release string, Timestamp: time.Now(), } } - -func newOngoingMigrationsAlert(n int, estimate time.Duration) alerts.Alert { - data := make(map[string]interface{}) - if rounded := estimate.Round(time.Minute); rounded > 0 { - data["estimate"] = fmt.Sprintf("~%v remaining", rounded) - } - - return alerts.Alert{ - ID: alertOngoingMigrationsID, - Severity: alerts.SeverityInfo, - Message: fmt.Sprintf("Migrating %d slabs", n), - Timestamp: time.Now(), - Data: data, - } -} - -func newRefreshHealthFailedAlert(err error) alerts.Alert { - return alerts.Alert{ - ID: alertHealthRefreshID, - Severity: alerts.SeverityCritical, - Message: "Health refresh failed", - Data: map[string]interface{}{ - "error": err.Error(), - }, - Timestamp: time.Now(), - } -} diff --git a/autopilot/autopilot.go b/autopilot/autopilot.go index 8e34c3e53..5af46822c 100644 --- a/autopilot/autopilot.go +++ b/autopilot/autopilot.go @@ -9,6 +9,8 @@ import ( "sync" "time" + rhpv3 "go.sia.tech/core/rhp/v3" + "go.sia.tech/core/consensus" "go.sia.tech/core/types" "go.sia.tech/coreutils/wallet" @@ -16,6 +18,7 @@ import ( "go.sia.tech/renterd/alerts" "go.sia.tech/renterd/api" "go.sia.tech/renterd/autopilot/contractor" + "go.sia.tech/renterd/autopilot/migrator" "go.sia.tech/renterd/autopilot/scanner" "go.sia.tech/renterd/build" "go.sia.tech/renterd/config" @@ -31,6 +34,8 @@ type Bus interface { // accounts Accounts(ctx context.Context, owner string) (accounts []api.Account, err error) + FundAccount(ctx context.Context, account rhpv3.Account, fcid types.FileContractID, amount types.Currency) (types.Currency, error) + UpdateAccounts(context.Context, []api.Account) error // autopilot AutopilotConfig(ctx context.Context) (api.AutopilotConfig, error) @@ -49,6 +54,8 @@ type Bus interface { FormContract(ctx context.Context, renterAddress types.Address, renterFunds types.Currency, hostKey types.PublicKey, hostCollateral types.Currency, endHeight uint64) (api.ContractMetadata, error) ContractRevision(ctx context.Context, fcid types.FileContractID) (api.Revision, error) RenewContract(ctx context.Context, fcid types.FileContractID, endHeight uint64, renterFunds, minNewCollateral types.Currency, expectedNewStorage uint64) (api.ContractMetadata, error) + RecordContractSpending(ctx context.Context, records []api.ContractSpendingRecord) error + RenewedContract(ctx context.Context, renewedFrom types.FileContractID) (api.ContractMetadata, error) UpdateContractUsability(ctx context.Context, contractID types.FileContractID, usability string) (err error) PrunableData(ctx context.Context) (prunableData api.ContractsPrunableDataResponse, err error) PruneContract(ctx context.Context, id types.FileContractID, timeout time.Duration) (api.ContractPruneResponse, error) @@ -65,16 +72,36 @@ type Bus interface { // buckets ListBuckets(ctx context.Context) ([]api.Bucket, error) + // migrations + UploadParams(ctx context.Context) (api.UploadParams, error) + UsableHosts(ctx context.Context) (hosts []api.HostInfo, err error) + AddMultipartPart(ctx context.Context, bucket, key, ETag, uploadID string, partNumber int, slices []object.SlabSlice) (err error) + AddObject(ctx context.Context, bucket, key string, o object.Object, opts api.AddObjectOptions) error + AddPartialSlab(ctx context.Context, data []byte, minShards, totalShards uint8) (slabs []object.SlabSlice, slabBufferMaxSizeSoftReached bool, err error) + AddUploadingSectors(ctx context.Context, uID api.UploadID, root []types.Hash256) error + FinishUpload(ctx context.Context, uID api.UploadID) error + MarkPackedSlabsUploaded(ctx context.Context, slabs []api.UploadedPackedSlab) error + TrackUpload(ctx context.Context, uID api.UploadID) error + UpdateSlab(ctx context.Context, key object.EncryptionKey, sectors []api.UploadedSector) error + + // locker + AcquireContract(ctx context.Context, fcid types.FileContractID, priority int, d time.Duration) (lockID uint64, err error) + KeepaliveContract(ctx context.Context, fcid types.FileContractID, lockID uint64, d time.Duration) (err error) + ReleaseContract(ctx context.Context, fcid types.FileContractID, lockID uint64) (err error) + // objects Objects(ctx context.Context, prefix string, opts api.ListObjectOptions) (resp api.ObjectsResponse, err error) RefreshHealth(ctx context.Context) error Slab(ctx context.Context, key object.EncryptionKey) (object.Slab, error) SlabsForMigration(ctx context.Context, healthCutoff float64, limit int) ([]api.UnhealthySlab, error) + DeleteHostSector(ctx context.Context, hk types.PublicKey, root types.Hash256) error + FetchPartialSlab(ctx context.Context, key object.EncryptionKey, offset, length uint32) ([]byte, error) // scanner ScanHost(ctx context.Context, hostKey types.PublicKey, timeout time.Duration) (resp api.HostScanResponse, err error) // settings + GougingParams(ctx context.Context) (api.GougingParams, error) GougingSettings(ctx context.Context) (gs api.GougingSettings, err error) UploadSettings(ctx context.Context) (us api.UploadSettings, err error) @@ -92,13 +119,12 @@ type Bus interface { } type Autopilot struct { - alerts alerts.Alerter - bus Bus - logger *zap.SugaredLogger - workers *workerPool + alerts alerts.Alerter + bus Bus + logger *zap.SugaredLogger c *contractor.Contractor - m *migrator + m migrator.Migrator s scanner.Scanner tickerDuration time.Duration @@ -120,14 +146,13 @@ type Autopilot struct { } // New initializes an Autopilot. -func New(cfg config.Autopilot, bus Bus, workers []Worker, logger *zap.Logger) (_ *Autopilot, err error) { +func New(cfg config.Autopilot, mCfg config.Migrator, masterKey utils.MasterKey, bus Bus, logger *zap.Logger) (_ *Autopilot, err error) { logger = logger.Named("autopilot") shutdownCtx, shutdownCtxCancel := context.WithCancel(context.Background()) ap := &Autopilot{ - alerts: alerts.WithOrigin(bus, "autopilot"), - bus: bus, - logger: logger.Sugar(), - workers: newWorkerPool(workers), + alerts: alerts.WithOrigin(bus, "autopilot"), + bus: bus, + logger: logger.Sugar(), shutdownCtx: shutdownCtx, shutdownCtxCancel: shutdownCtxCancel, @@ -143,7 +168,10 @@ func New(cfg config.Autopilot, bus Bus, workers []Worker, logger *zap.Logger) (_ } ap.c = contractor.New(bus, bus, cfg.RevisionSubmissionBuffer, cfg.RevisionBroadcastInterval, cfg.AllowRedundantHostIPs, ap.logger) - ap.m = newMigrator(ap, cfg.MigrationHealthCutoff, cfg.MigratorParallelSlabsPerWorker) + ap.m, err = migrator.New(ap.shutdownCtx, mCfg, masterKey, ap.alerts, ap.bus, ap.bus, logger) + if err != nil { + return + } return ap, nil } @@ -225,88 +253,7 @@ func (ap *Autopilot) Run() { for !ap.isStopped() { ap.logger.Info("autopilot iteration starting") tickerFired := make(chan struct{}) - ap.workers.withWorker(func(w Worker) { - defer ap.logger.Info("autopilot iteration ended") - - // initiate a host scan - no need to be synced or configured for scanning - ap.s.Scan(ap.shutdownCtx, ap.bus, forceScan) - - // reset forceScans - forceScan = false - - // block until consensus is synced - if synced, blocked, interrupted := ap.blockUntilSynced(ap.ticker.C); !synced { - if interrupted { - close(tickerFired) - return - } - ap.logger.Info("autopilot stopped before consensus was synced") - return - } else if blocked { - if scanning, _ := ap.s.Status(); !scanning { - ap.s.Scan(ap.shutdownCtx, ap.bus, true) - } - } - - // block until the autopilot is enabled - if enabled, interrupted := ap.blockUntilEnabled(ap.ticker.C); !enabled { - if interrupted { - close(tickerFired) - return - } - ap.logger.Info("autopilot stopped before it was able to confirm it was enabled in the bus") - return - } - - // fetch autopilot config - apCfg, err := ap.bus.AutopilotConfig(ap.shutdownCtx) - if err != nil { - ap.logger.Errorf("aborting maintenance, failed to fetch autopilot", zap.Error(err)) - return - } - - // update the scanner with the hosts config - ap.s.UpdateHostsConfig(apCfg.Hosts) - - // perform wallet maintenance - err = ap.performWalletMaintenance(ap.shutdownCtx) - if err != nil { - ap.logger.Errorf("wallet maintenance failed, err: %v", err) - } - - // build maintenance state - buildState, err := ap.buildState(ap.shutdownCtx) - if err != nil { - ap.logger.Errorf("aborting maintenance, failed to build state, err: %v", err) - return - } - - // perform maintenance - setChanged, err := ap.c.PerformContractMaintenance(ap.shutdownCtx, buildState) - if err != nil && utils.IsErr(err, context.Canceled) { - return - } else if err != nil { - ap.logger.Errorf("contract maintenance failed, err: %v", err) - } - maintenanceSuccess := err == nil - - // upon success, notify the migrator. The health of slabs might have - // changed. - if maintenanceSuccess && setChanged { - ap.m.SignalMaintenanceFinished() - } - - // migration - ap.m.tryPerformMigrations(ap.workers) - - // pruning - if apCfg.Contracts.Prune { - ap.tryPerformPruning() - } else { - ap.logger.Info("pruning disabled") - } - }) - + ap.tick(forceScan, tickerFired) select { case <-ap.shutdownCtx.Done(): return @@ -459,6 +406,88 @@ func (ap *Autopilot) blockUntilSynced(interrupt <-chan time.Time) (synced, block } } +func (ap *Autopilot) tick(forceScan bool, tickerFired chan struct{}) { + defer ap.logger.Info("autopilot iteration ended") + + // initiate a host scan - no need to be synced or configured for scanning + ap.s.Scan(ap.shutdownCtx, ap.bus, forceScan) + + // reset forceScans + forceScan = false + + // block until consensus is synced + if synced, blocked, interrupted := ap.blockUntilSynced(ap.ticker.C); !synced { + if interrupted { + close(tickerFired) + return + } + ap.logger.Info("autopilot stopped before consensus was synced") + return + } else if blocked { + if scanning, _ := ap.s.Status(); !scanning { + ap.s.Scan(ap.shutdownCtx, ap.bus, true) + } + } + + // block until the autopilot is enabled + if enabled, interrupted := ap.blockUntilEnabled(ap.ticker.C); !enabled { + if interrupted { + close(tickerFired) + return + } + ap.logger.Info("autopilot stopped before it was able to confirm it was enabled in the bus") + return + } + + // fetch autopilot config + apCfg, err := ap.bus.AutopilotConfig(ap.shutdownCtx) + if err != nil { + ap.logger.Errorf("aborting maintenance, failed to fetch autopilot", zap.Error(err)) + return + } + + // update the scanner with the hosts config + ap.s.UpdateHostsConfig(apCfg.Hosts) + + // perform wallet maintenance + err = ap.performWalletMaintenance(ap.shutdownCtx) + if err != nil { + ap.logger.Errorf("wallet maintenance failed, err: %v", err) + } + + // build maintenance state + buildState, err := ap.buildState(ap.shutdownCtx) + if err != nil { + ap.logger.Errorf("aborting maintenance, failed to build state, err: %v", err) + return + } + + // perform maintenance + setChanged, err := ap.c.PerformContractMaintenance(ap.shutdownCtx, buildState) + if err != nil && utils.IsErr(err, context.Canceled) { + return + } else if err != nil { + ap.logger.Errorf("contract maintenance failed, err: %v", err) + } + maintenanceSuccess := err == nil + + // upon success, notify the migrator. The health of slabs might have + // changed. + if maintenanceSuccess && setChanged { + ap.m.SignalMaintenanceFinished() + } + + // migration + ap.m.Migrate(ap.shutdownCtx) + + // pruning + if apCfg.Contracts.Prune { + ap.tryPerformPruning() + } else { + ap.logger.Info("pruning disabled") + } +} + func (ap *Autopilot) tryScheduleTriggerWhenFunded() error { // apply sane timeout ctx, cancel := context.WithTimeout(ap.shutdownCtx, time.Minute) diff --git a/autopilot/contractor/contractor.go b/autopilot/contractor/contractor.go index e8c1dbd26..3bd3dab88 100644 --- a/autopilot/contractor/contractor.go +++ b/autopilot/contractor/contractor.go @@ -497,6 +497,11 @@ func activeContracts(ctx context.Context, bus Bus, logger *zap.SugaredLogger) ([ } wg.Wait() + logger. + With("elapsed", time.Since(start)). + With("contracts", len(contracts)). + Info("done fetching all revisions") + return contracts, nil } diff --git a/autopilot/migrator.go b/autopilot/migrator.go deleted file mode 100644 index e13076111..000000000 --- a/autopilot/migrator.go +++ /dev/null @@ -1,262 +0,0 @@ -package autopilot - -import ( - "context" - "fmt" - "math" - "sort" - "sync" - "time" - - "go.sia.tech/renterd/api" - "go.sia.tech/renterd/internal/utils" - "go.sia.tech/renterd/object" - "go.uber.org/zap" -) - -const ( - migratorBatchSize = math.MaxInt // TODO: change once we have a fix for the infinite loop - - // migrationAlertRegisterInterval is the interval at which we update the - // ongoing migrations alert to indicate progress - migrationAlertRegisterInterval = 30 * time.Second -) - -type ( - migrator struct { - ap *Autopilot - logger *zap.SugaredLogger - healthCutoff float64 - parallelSlabsPerWorker uint64 - signalConsensusNotSynced chan struct{} - signalMaintenanceFinished chan struct{} - statsSlabMigrationSpeedMS *utils.DataPoints - - mu sync.Mutex - migrating bool - migratingLastStart time.Time - } - - job struct { - api.UnhealthySlab - slabIdx int - batchSize int - - b Bus - } -) - -func (j *job) execute(ctx context.Context, w Worker) (time.Duration, error) { - start := time.Now() - slab, err := j.b.Slab(ctx, j.EncryptionKey) - if err != nil { - return 0, fmt.Errorf("failed to fetch slab; %w", err) - } - - err = w.MigrateSlab(ctx, slab) - return time.Since(start), err -} - -func newMigrator(ap *Autopilot, healthCutoff float64, parallelSlabsPerWorker uint64) *migrator { - return &migrator{ - ap: ap, - logger: ap.logger.Named("migrator"), - healthCutoff: healthCutoff, - parallelSlabsPerWorker: parallelSlabsPerWorker, - signalConsensusNotSynced: make(chan struct{}, 1), - signalMaintenanceFinished: make(chan struct{}, 1), - statsSlabMigrationSpeedMS: utils.NewDataPoints(time.Hour), - } -} - -func (m *migrator) SignalMaintenanceFinished() { - select { - case m.signalMaintenanceFinished <- struct{}{}: - default: - } -} - -func (m *migrator) Status() (bool, time.Time) { - m.mu.Lock() - defer m.mu.Unlock() - return m.migrating, m.migratingLastStart -} - -func (m *migrator) slabMigrationEstimate(remaining int) time.Duration { - // recompute p90 - m.statsSlabMigrationSpeedMS.Recompute() - - // return 0 if p90 is 0 (can happen if we haven't collected enough data points) - p90 := m.statsSlabMigrationSpeedMS.P90() - if p90 == 0 { - return 0 - } - - totalNumMS := float64(remaining) * p90 / float64(m.parallelSlabsPerWorker) - return time.Duration(totalNumMS) * time.Millisecond -} - -func (m *migrator) tryPerformMigrations(wp *workerPool) { - m.mu.Lock() - if m.migrating || m.ap.isStopped() { - m.mu.Unlock() - return - } - m.migrating = true - m.migratingLastStart = time.Now() - m.mu.Unlock() - - m.ap.wg.Add(1) - go func() { - defer m.ap.wg.Done() - m.performMigrations(wp) - m.mu.Lock() - m.migrating = false - m.mu.Unlock() - }() -} - -func (m *migrator) performMigrations(p *workerPool) { - m.logger.Info("performing migrations") - b := m.ap.bus - - // prepare a channel to push work to the workers - jobs := make(chan job) - var wg sync.WaitGroup - defer func() { - close(jobs) - wg.Wait() - }() - - // launch workers - p.withWorkers(func(workers []Worker) { - for _, w := range workers { - for i := uint64(0); i < m.parallelSlabsPerWorker; i++ { - wg.Add(1) - go func(w Worker) { - defer wg.Done() - - // derive ctx from shutdown ctx - ctx, cancel := context.WithCancel(m.ap.shutdownCtx) - defer cancel() - - // process jobs - for j := range jobs { - duration, err := j.execute(ctx, w) - m.statsSlabMigrationSpeedMS.Track(float64(duration.Milliseconds())) - if utils.IsErr(err, api.ErrConsensusNotSynced) { - // interrupt migrations if consensus is not synced - select { - case m.signalConsensusNotSynced <- struct{}{}: - default: - } - return - } else if err != nil { - m.logger.Errorw("migration failed", - zap.Float64("health", j.Health), - zap.Stringer("slab", j.EncryptionKey)) - } - } - }(w) - } - } - }) - var toMigrate []api.UnhealthySlab - - // ignore a potential signal before the first iteration of the 'OUTER' loop - select { - case <-m.signalMaintenanceFinished: - default: - } - - // helper to update 'toMigrate' - updateToMigrate := func() { - // fetch slabs for migration - toMigrateNew, err := b.SlabsForMigration(m.ap.shutdownCtx, m.healthCutoff, migratorBatchSize) - if err != nil { - m.logger.Errorf("failed to fetch slabs for migration, err: %v", err) - return - } - m.logger.Infof("%d potential slabs fetched for migration", len(toMigrateNew)) - - // merge toMigrateNew with toMigrate - // NOTE: when merging, we remove all slabs from toMigrate that don't - // require migration anymore. However, slabs that have been in toMigrate - // before will be repaired before any new slabs. This is to prevent - // starvation. - migrateNewMap := make(map[object.EncryptionKey]*api.UnhealthySlab) - for i, slab := range toMigrateNew { - migrateNewMap[slab.EncryptionKey] = &toMigrateNew[i] - } - removed := 0 - for i := 0; i < len(toMigrate)-removed; { - slab := toMigrate[i] - if _, exists := migrateNewMap[slab.EncryptionKey]; exists { - delete(migrateNewMap, slab.EncryptionKey) // delete from map to leave only new slabs - i++ - } else { - toMigrate[i] = toMigrate[len(toMigrate)-1-removed] - removed++ - } - } - toMigrate = toMigrate[:len(toMigrate)-removed] - for _, slab := range migrateNewMap { - toMigrate = append(toMigrate, *slab) - } - - // sort the newly added slabs by health - newSlabs := toMigrate[len(toMigrate)-len(migrateNewMap):] - sort.Slice(newSlabs, func(i, j int) bool { - return newSlabs[i].Health < newSlabs[j].Health - }) - } - - // unregister the ongoing migrations alert when we're done - defer m.ap.alerts.DismissAlerts(m.ap.shutdownCtx, alertOngoingMigrationsID) - -OUTER: - for { - // recompute health. - start := time.Now() - if err := b.RefreshHealth(m.ap.shutdownCtx); err != nil { - m.ap.RegisterAlert(m.ap.shutdownCtx, newRefreshHealthFailedAlert(err)) - m.logger.Errorf("failed to recompute cached health before migration: %v", err) - } else { - m.ap.DismissAlert(m.ap.shutdownCtx, alertHealthRefreshID) - m.logger.Infof("recomputed slab health in %v", time.Since(start)) - updateToMigrate() - } - - // log the updated list of slabs to migrate - m.logger.Infof("%d slabs to migrate", len(toMigrate)) - - // return if there are no slabs to migrate - if len(toMigrate) == 0 { - return - } - - var lastRegister time.Time - for i, slab := range toMigrate { - if time.Since(lastRegister) > migrationAlertRegisterInterval { - // register an alert to notify users about ongoing migrations - remaining := len(toMigrate) - i - m.ap.RegisterAlert(m.ap.shutdownCtx, newOngoingMigrationsAlert(remaining, m.slabMigrationEstimate(remaining))) - lastRegister = time.Now() - } - select { - case <-m.ap.shutdownCtx.Done(): - return - case <-m.signalConsensusNotSynced: - m.logger.Info("migrations interrupted - consensus is not synced") - return - case <-m.signalMaintenanceFinished: - m.logger.Info("migrations interrupted - updating slabs for migration") - continue OUTER - case jobs <- job{slab, i, len(toMigrate), b}: - } - } - - // all slabs migrated - return - } -} diff --git a/autopilot/migrator/accounts.go b/autopilot/migrator/accounts.go new file mode 100644 index 000000000..3db639189 --- /dev/null +++ b/autopilot/migrator/accounts.go @@ -0,0 +1,110 @@ +package migrator + +import ( + "context" + "fmt" + "time" + + rhpv4 "go.sia.tech/core/rhp/v4" + "go.sia.tech/core/types" + "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/gouging" + "go.sia.tech/renterd/internal/locking" + rhp3 "go.sia.tech/renterd/internal/rhp/v3" +) + +const ( + defaultRevisionFetchTimeout = 30 * time.Second + + lockingPrioritySyncing = 30 +) + +func (m *migrator) FundAccount(ctx context.Context, fcid types.FileContractID, hk types.PublicKey, desired types.Currency) error { + // calculate the deposit amount + acc := m.accounts.ForHost(hk) + return acc.WithDeposit(func(balance types.Currency) (types.Currency, error) { + // return early if we have the desired balance + if balance.Cmp(desired) >= 0 { + return types.ZeroCurrency, nil + } + deposit := desired.Sub(balance) + + // fund the account + var err error + deposit, err = m.bus.FundAccount(ctx, acc.ID(), fcid, desired.Sub(balance)) + if err != nil { + if rhp3.IsBalanceMaxExceeded(err) { + acc.ScheduleSync() + } + return types.ZeroCurrency, fmt.Errorf("failed to fund account with %v; %w", deposit, err) + } + + // log the account balance after funding + m.logger.Debugw("fund account succeeded", + "balance", balance.ExactString(), + "deposit", deposit.ExactString(), + ) + return deposit, nil + }) +} + +func (m *migrator) SyncAccount(ctx context.Context, fcid types.FileContractID, host api.HostInfo) error { + // handle v2 host + if host.IsV2() { + account := m.accounts.ForHost(host.PublicKey) + return account.WithSync(func() (types.Currency, error) { + return m.rhp4Client.AccountBalance(ctx, host.PublicKey, host.V2SiamuxAddr(), rhpv4.Account(account.ID())) + }) + } + + // attach gouging checker + gp, err := m.bus.GougingParams(ctx) + if err != nil { + return fmt.Errorf("couldn't get gouging parameters; %w", err) + } + ctx = gouging.WithChecker(ctx, m.bus, gp) + + // sync the account + h := m.Host(host.PublicKey, fcid, host.SiamuxAddr) + err = m.withRevision(ctx, fcid, host.PublicKey, host.SiamuxAddr, defaultRevisionFetchTimeout, lockingPrioritySyncing, func(rev types.FileContractRevision) error { + return h.SyncAccount(ctx, &rev) + }) + if err != nil { + return fmt.Errorf("failed to sync account; %w", err) + } + return nil +} + +func (m *migrator) withRevision(ctx context.Context, fcid types.FileContractID, hk types.PublicKey, siamuxAddr string, fetchTimeout time.Duration, lockPriority int, fn func(rev types.FileContractRevision) error) error { + return m.withContractLock(ctx, fcid, lockPriority, func() error { + if fetchTimeout > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, fetchTimeout) + defer cancel() + } + + rev, err := m.Host(hk, fcid, siamuxAddr).FetchRevision(ctx, fcid) + if err != nil { + return err + } + return fn(rev) + }) +} + +func (m *migrator) withContractLock(ctx context.Context, fcid types.FileContractID, priority int, fn func() error) error { + contractLock, err := m.acquireContractLock(ctx, fcid, priority) + if err != nil { + return err + } + defer func() { + releaseCtx, cancel := context.WithTimeout(m.shutdownCtx, 10*time.Second) + _ = contractLock.Release(releaseCtx) + cancel() + }() + + return fn() +} + +func (m *migrator) acquireContractLock(ctx context.Context, fcid types.FileContractID, priority int) (_ *locking.ContractLock, err error) { + return locking.NewContractLock(ctx, fcid, priority, m.bus, m.logger) +} diff --git a/autopilot/migrator/alerts.go b/autopilot/migrator/alerts.go new file mode 100644 index 000000000..f362cecdb --- /dev/null +++ b/autopilot/migrator/alerts.go @@ -0,0 +1,85 @@ +package migrator + +import ( + "errors" + "fmt" + "time" + + "go.sia.tech/renterd/alerts" + "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/utils" + "go.sia.tech/renterd/object" +) + +var ( + alertHealthRefreshID = alerts.RandomAlertID() // constant until restarted + alertMigrationID = alerts.RandomAlertID() // constant until restarted + alertOngoingMigrationsID = alerts.RandomAlertID() // constant until restarted +) + +func newMigrationFailedAlert(slabKey object.EncryptionKey, health float64, objects []api.ObjectMetadata, err error) alerts.Alert { + data := map[string]interface{}{ + "error": err.Error(), + "health": health, + "slabKey": slabKey.String(), + "hint": "Migration failures can be temporary, but if they persist it can eventually lead to data loss and should therefor be taken very seriously.", + } + + if len(objects) > 0 { + data["objects"] = objects + } + + hostErr := err + for errors.Unwrap(hostErr) != nil { + hostErr = errors.Unwrap(hostErr) + } + if set, ok := hostErr.(utils.HostErrorSet); ok { + hostErrors := make(map[string]string, len(set)) + for hk, err := range set { + hostErrors[hk.String()] = err.Error() + } + data["hosts"] = hostErrors + } + + severity := alerts.SeverityError + if health < 0.25 { + severity = alerts.SeverityCritical + } else if health < 0.5 { + severity = alerts.SeverityWarning + } + + return alerts.Alert{ + ID: alerts.IDForSlab(alertMigrationID, slabKey), + Severity: severity, + Message: "Slab migration failed", + Data: data, + Timestamp: time.Now(), + } +} + +func newOngoingMigrationsAlert(n int, estimate time.Duration) alerts.Alert { + data := make(map[string]interface{}) + if rounded := estimate.Round(time.Minute); rounded > 0 { + data["estimate"] = fmt.Sprintf("~%v remaining", rounded) + } + + return alerts.Alert{ + ID: alertOngoingMigrationsID, + Severity: alerts.SeverityInfo, + Message: fmt.Sprintf("Migrating %d slabs", n), + Timestamp: time.Now(), + Data: data, + } +} + +func newRefreshHealthFailedAlert(err error) alerts.Alert { + return alerts.Alert{ + ID: alertHealthRefreshID, + Severity: alerts.SeverityCritical, + Message: "Health refresh failed", + Data: map[string]interface{}{ + "error": err.Error(), + }, + Timestamp: time.Now(), + } +} diff --git a/autopilot/migrator/hosts.go b/autopilot/migrator/hosts.go new file mode 100644 index 000000000..9504c63a2 --- /dev/null +++ b/autopilot/migrator/hosts.go @@ -0,0 +1,369 @@ +package migrator + +import ( + "context" + "fmt" + "io" + "math" + + rhpv2 "go.sia.tech/core/rhp/v2" + rhpv3 "go.sia.tech/core/rhp/v3" + rhpv4 "go.sia.tech/core/rhp/v4" + "go.sia.tech/core/types" + rhp "go.sia.tech/coreutils/rhp/v4" + "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/accounts" + "go.sia.tech/renterd/internal/gouging" + "go.sia.tech/renterd/internal/host" + "go.sia.tech/renterd/internal/prices" + rhp3 "go.sia.tech/renterd/internal/rhp/v3" + rhp4 "go.sia.tech/renterd/internal/rhp/v4" + "go.sia.tech/renterd/internal/utils" + "go.uber.org/zap" +) + +type ( + hostClient struct { + hk types.PublicKey + renterKey types.PrivateKey + siamuxAddr string + + acc *accounts.Account + csr ContractSpendingRecorder + pts *prices.PriceTables + rhp3 *rhp3.Client + logger *zap.SugaredLogger + } + + hostDownloadClient struct { + hi api.HostInfo + acc *accounts.Account + pts *prices.PriceTables + rhp3 *rhp3.Client + } + + hostV2DownloadClient struct { + hi api.HostInfo + acc *accounts.Account + pts *prices.PricesCache + rhp4 *rhp4.Client + } + + hostUploadClient struct { + fcid types.FileContractID + hi api.HostInfo + rk types.PrivateKey + + acc *accounts.Account + csr ContractSpendingRecorder + pts *prices.PriceTables + rhp3 *rhp3.Client + } + + hostV2UploadClient struct { + fcid types.FileContractID + hi api.HostInfo + rk types.PrivateKey + + acc *accounts.Account + csr ContractSpendingRecorder + pts *prices.PricesCache + rhp4 *rhp4.Client + } +) + +var ( + _ host.Host = (*hostClient)(nil) + _ host.HostManager = (*migrator)(nil) +) + +func (m *migrator) Host(hk types.PublicKey, fcid types.FileContractID, siamuxAddr string) host.Host { + return &hostClient{ + rhp3: m.rhp3Client, + hk: hk, + acc: m.accounts.ForHost(hk), + csr: m.contractSpendingRecorder, + logger: m.logger.Named(hk.String()[:4]), + siamuxAddr: siamuxAddr, + renterKey: m.masterKey.DeriveContractKey(hk), + pts: m.priceTables, + } +} + +func (m *migrator) Downloader(hi api.HostInfo) host.Downloader { + if hi.IsV2() { + return &hostV2DownloadClient{ + hi: hi, + acc: m.accounts.ForHost(hi.PublicKey), + pts: m.pricesCache, + rhp4: m.rhp4Client, + } + } + return &hostDownloadClient{ + hi: hi, + acc: m.accounts.ForHost(hi.PublicKey), + pts: m.priceTables, + rhp3: m.rhp3Client, + } +} + +func (m *migrator) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Uploader { + if hi.IsV2() { + return &hostV2UploadClient{ + fcid: fcid, + hi: hi, + rk: m.masterKey.DeriveContractKey(hi.PublicKey), + + acc: m.accounts.ForHost(hi.PublicKey), + csr: m.contractSpendingRecorder, + pts: m.pricesCache, + rhp4: m.rhp4Client, + } + } + return &hostUploadClient{ + fcid: fcid, + hi: hi, + rk: m.masterKey.DeriveContractKey(hi.PublicKey), + + acc: m.accounts.ForHost(hi.PublicKey), + csr: m.contractSpendingRecorder, + pts: m.priceTables, + rhp3: m.rhp3Client, + } +} + +func (c *hostClient) PublicKey() types.PublicKey { return c.hk } +func (c *hostDownloadClient) PublicKey() types.PublicKey { return c.hi.PublicKey } +func (c *hostV2DownloadClient) PublicKey() types.PublicKey { return c.hi.PublicKey } +func (c *hostUploadClient) PublicKey() types.PublicKey { return c.hi.PublicKey } +func (c *hostV2UploadClient) PublicKey() types.PublicKey { return c.hi.PublicKey } + +func (h *hostClient) PriceTableUnpaid(ctx context.Context) (api.HostPriceTable, error) { + return h.rhp3.PriceTableUnpaid(ctx, h.hk, h.siamuxAddr) +} + +func (h *hostClient) PriceTable(ctx context.Context, rev *types.FileContractRevision) (hpt api.HostPriceTable, cost types.Currency, err error) { + // fetchPT is a helper function that performs the RPC given a payment function + fetchPT := func(paymentFn rhp3.PriceTablePaymentFunc) (api.HostPriceTable, error) { + return h.rhp3.PriceTable(ctx, h.hk, h.siamuxAddr, paymentFn) + } + + // fetch the price table + if rev != nil { + hpt, err = fetchPT(rhp3.PreparePriceTableContractPayment(rev, h.acc.ID(), h.renterKey)) + } else { + hpt, err = fetchPT(rhp3.PreparePriceTableAccountPayment(h.acc.Key())) + } + + // set the cost + if err == nil { + cost = hpt.UpdatePriceTableCost + } + return +} + +// FetchRevision tries to fetch a contract revision from the host. +func (h *hostClient) FetchRevision(ctx context.Context, fcid types.FileContractID) (types.FileContractRevision, error) { + return h.rhp3.Revision(ctx, fcid, h.hk, h.siamuxAddr) +} + +func (h *hostClient) FundAccount(ctx context.Context, desired types.Currency, rev *types.FileContractRevision) error { + log := h.logger.With( + zap.Stringer("host", h.hk), + zap.Stringer("account", h.acc.ID()), + ) + + // ensure we have at least 2H in the contract to cover the costs + if types.NewCurrency64(2).Cmp(rev.ValidRenterPayout()) >= 0 { + return fmt.Errorf("insufficient funds to fund account: %v <= %v", rev.ValidRenterPayout(), types.NewCurrency64(2)) + } + + // calculate the deposit amount + return h.acc.WithDeposit(func(balance types.Currency) (types.Currency, error) { + // return early if we have the desired balance + if balance.Cmp(desired) >= 0 { + return types.ZeroCurrency, nil + } + deposit := desired.Sub(balance) + + // fetch pricetable directly to bypass the gouging check + pt, _, err := h.pts.Fetch(ctx, h, rev) + if err != nil { + return types.ZeroCurrency, err + } + + // cap the deposit by what's left in the contract + cost := types.NewCurrency64(1) + availableFunds := rev.ValidRenterPayout().Sub(cost) + if deposit.Cmp(availableFunds) > 0 { + deposit = availableFunds + } + + // fund the account + if err := h.rhp3.FundAccount(ctx, rev, h.hk, h.siamuxAddr, deposit, h.acc.ID(), pt.HostPriceTable, h.renterKey); err != nil { + if rhp3.IsBalanceMaxExceeded(err) { + h.acc.ScheduleSync() + } + return types.ZeroCurrency, fmt.Errorf("failed to fund account with %v; %w", deposit, err) + } + + // record the spend + h.csr.RecordV1(*rev, api.ContractSpending{FundAccount: deposit.Add(cost)}) + + // log the account balance after funding + log.Debugw("fund account succeeded", + "balance", balance.ExactString(), + "deposit", deposit.ExactString(), + ) + return deposit, nil + }) +} + +func (h *hostClient) SyncAccount(ctx context.Context, rev *types.FileContractRevision) error { + // fetch pricetable directly to bypass the gouging check + pt, _, err := h.pts.Fetch(ctx, h, rev) + if err != nil { + return err + } + + // check only the AccountBalanceCost + if types.NewCurrency64(1).Cmp(pt.AccountBalanceCost) < 0 { + return fmt.Errorf("%w: host is gouging on AccountBalanceCost", gouging.ErrPriceTableGouging) + } + + return h.acc.WithSync(func() (types.Currency, error) { + return h.rhp3.SyncAccount(ctx, rev, h.hk, h.siamuxAddr, h.acc.ID(), pt.HostPriceTable, h.renterKey) + }) +} + +func (c *hostDownloadClient) DownloadSector(ctx context.Context, w io.Writer, root types.Hash256, offset, length uint64) (err error) { + return c.acc.WithWithdrawal(func() (types.Currency, error) { + pt, ptc, err := c.pts.Fetch(ctx, c, nil) + if err != nil { + return types.ZeroCurrency, err + } + + cost, err := c.rhp3.ReadSector(ctx, offset, length, root, w, c.hi.PublicKey, c.hi.SiamuxAddr, c.acc.ID(), c.acc.Key(), pt.HostPriceTable) + if err != nil { + return ptc, err + } + return ptc.Add(cost), nil + }) +} + +func (c *hostDownloadClient) PriceTable(ctx context.Context, rev *types.FileContractRevision) (hpt api.HostPriceTable, cost types.Currency, err error) { + hpt, err = c.rhp3.PriceTable(ctx, c.hi.PublicKey, c.hi.SiamuxAddr, rhp3.PreparePriceTableAccountPayment(c.acc.Key())) + if err == nil { + cost = hpt.UpdatePriceTableCost + } + return +} + +func (c *hostV2DownloadClient) DownloadSector(ctx context.Context, w io.Writer, root types.Hash256, offset, length uint64) (err error) { + return c.acc.WithWithdrawal(func() (types.Currency, error) { + prices, err := c.pts.Fetch(ctx, c) + if err != nil { + return types.ZeroCurrency, err + } + + res, err := c.rhp4.ReadSector(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), prices, c.acc.Token(), w, root, offset, length) + if err != nil { + return types.ZeroCurrency, err + } + return res.Usage.RenterCost(), nil + }) +} + +func (c *hostV2DownloadClient) Prices(ctx context.Context) (rhpv4.HostPrices, error) { + settings, err := c.rhp4.Settings(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr()) + if err != nil { + return rhpv4.HostPrices{}, err + } + return settings.Prices, nil +} + +func (c *hostUploadClient) PriceTable(ctx context.Context, rev *types.FileContractRevision) (hpt api.HostPriceTable, cost types.Currency, err error) { + hpt, err = c.rhp3.PriceTable(ctx, c.hi.PublicKey, c.hi.SiamuxAddr, rhp3.PreparePriceTableAccountPayment(c.acc.Key())) + if err == nil { + cost = hpt.UpdatePriceTableCost + } + return +} + +func (c *hostUploadClient) UploadSector(ctx context.Context, sectorRoot types.Hash256, sector *[rhpv2.SectorSize]byte) error { + rev, err := c.rhp3.Revision(ctx, c.fcid, c.hi.PublicKey, c.hi.SiamuxAddr) + if err != nil { + return fmt.Errorf("%w; %w", rhp3.ErrFailedToFetchRevision, err) + } else if rev.RevisionNumber == math.MaxUint64 { + return rhp3.ErrMaxRevisionReached + } + + var hpt rhpv3.HostPriceTable + if err := c.acc.WithWithdrawal(func() (amount types.Currency, err error) { + pt, cost, err := c.pts.Fetch(ctx, c, nil) + if err != nil { + return types.ZeroCurrency, err + } + hpt = pt.HostPriceTable + + gc, err := gouging.CheckerFromContext(ctx) + if err != nil { + return cost, err + } + if breakdown := gc.CheckV1(nil, &pt.HostPriceTable); breakdown.Gouging() { + return cost, fmt.Errorf("%w: %v", gouging.ErrPriceTableGouging, breakdown) + } + return cost, nil + }); err != nil { + return err + } + + cost, err := c.rhp3.AppendSector(ctx, sectorRoot, sector, &rev, c.hi.PublicKey, c.hi.SiamuxAddr, c.acc.ID(), hpt, c.rk) + if err != nil { + return fmt.Errorf("failed to upload sector: %w", err) + } + + c.csr.RecordV1(rev, api.ContractSpending{Uploads: cost}) + return nil +} + +func (c *hostV2UploadClient) UploadSector(ctx context.Context, sectorRoot types.Hash256, sector *[rhpv2.SectorSize]byte) error { + fc, err := c.rhp4.LatestRevision(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), c.fcid) + if err != nil { + return err + } + + rev := rhp.ContractRevision{ + ID: c.fcid, + Revision: fc, + } + + return c.acc.WithWithdrawal(func() (types.Currency, error) { + prices, err := c.pts.Fetch(ctx, c) + if err != nil { + return types.ZeroCurrency, err + } + + res, err := c.rhp4.WriteSector(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), prices, c.acc.Token(), utils.NewReaderLen(sector[:]), rhpv2.SectorSize, api.BlocksPerDay*3) + if err != nil { + return types.ZeroCurrency, fmt.Errorf("failed to write sector: %w", err) + } + cost := res.Usage.RenterCost() + + res2, err := c.rhp4.AppendSectors(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), prices, c.rk, rev, []types.Hash256{res.Root}) + if err != nil { + return cost, fmt.Errorf("failed to write sector: %w", err) + } + + c.csr.RecordV2(rhp.ContractRevision{ID: rev.ID, Revision: res2.Revision}, api.ContractSpending{Uploads: res2.Usage.RenterCost()}) + return cost, nil + }) +} + +func (c *hostV2UploadClient) Prices(ctx context.Context) (rhpv4.HostPrices, error) { + settings, err := c.rhp4.Settings(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr()) + if err != nil { + return rhpv4.HostPrices{}, err + } + return settings.Prices, nil +} diff --git a/autopilot/migrator/migrator.go b/autopilot/migrator/migrator.go new file mode 100644 index 000000000..42117570f --- /dev/null +++ b/autopilot/migrator/migrator.go @@ -0,0 +1,372 @@ +package migrator + +import ( + "context" + "math" + "net" + "sort" + "sync" + "time" + + rhpv3 "go.sia.tech/core/rhp/v3" + "go.sia.tech/core/types" + "go.sia.tech/renterd/alerts" + "go.sia.tech/renterd/api" + "go.sia.tech/renterd/config" + "go.sia.tech/renterd/internal/accounts" + "go.sia.tech/renterd/internal/download" + "go.sia.tech/renterd/internal/memory" + "go.sia.tech/renterd/internal/prices" + "go.sia.tech/renterd/internal/rhp" + rhp3 "go.sia.tech/renterd/internal/rhp/v3" + rhp4 "go.sia.tech/renterd/internal/rhp/v4" + "go.sia.tech/renterd/internal/upload" + "go.sia.tech/renterd/internal/utils" + "go.sia.tech/renterd/object" + "go.uber.org/zap" +) + +const ( + // migrationAlertRegisterInterval is the interval at which we update the + // ongoing migrations alert to indicate progress + migrationAlertRegisterInterval = 30 * time.Second + + // migratorBatchSize is the amount of slabs we fetch for migration from the + // slab store at once + migratorBatchSize = math.MaxInt // TODO: change once we have a fix for the infinite loop +) + +type ( + Bus interface { + Accounts(context.Context, string) ([]api.Account, error) + AddMultipartPart(ctx context.Context, bucket, key, ETag, uploadID string, partNumber int, slices []object.SlabSlice) (err error) + AddObject(ctx context.Context, bucket, key string, o object.Object, opts api.AddObjectOptions) error + AddPartialSlab(ctx context.Context, data []byte, minShards, totalShards uint8) (slabs []object.SlabSlice, slabBufferMaxSizeSoftReached bool, err error) + AddUploadingSectors(ctx context.Context, uID api.UploadID, root []types.Hash256) error + AcquireContract(ctx context.Context, fcid types.FileContractID, priority int, d time.Duration) (lockID uint64, err error) + ConsensusState(ctx context.Context) (api.ConsensusState, error) + Contracts(ctx context.Context, opts api.ContractsOpts) ([]api.ContractMetadata, error) + DeleteHostSector(ctx context.Context, hk types.PublicKey, root types.Hash256) error + FetchPartialSlab(ctx context.Context, key object.EncryptionKey, offset, length uint32) ([]byte, error) + FinishUpload(ctx context.Context, uID api.UploadID) error + FundAccount(ctx context.Context, account rhpv3.Account, fcid types.FileContractID, amount types.Currency) (types.Currency, error) + GougingParams(ctx context.Context) (api.GougingParams, error) + Host(ctx context.Context, hostKey types.PublicKey) (api.Host, error) + KeepaliveContract(ctx context.Context, fcid types.FileContractID, lockID uint64, d time.Duration) (err error) + MarkPackedSlabsUploaded(ctx context.Context, slabs []api.UploadedPackedSlab) error + Objects(ctx context.Context, prefix string, opts api.ListObjectOptions) (resp api.ObjectsResponse, err error) + RecordContractSpending(ctx context.Context, records []api.ContractSpendingRecord) error + ReleaseContract(ctx context.Context, fcid types.FileContractID, lockID uint64) (err error) + RenewedContract(ctx context.Context, renewedFrom types.FileContractID) (api.ContractMetadata, error) + Slab(ctx context.Context, key object.EncryptionKey) (object.Slab, error) + TrackUpload(ctx context.Context, uID api.UploadID) error + UpdateAccounts(context.Context, []api.Account) error + UpdateSlab(ctx context.Context, key object.EncryptionKey, sectors []api.UploadedSector) error + UploadParams(ctx context.Context) (api.UploadParams, error) + UsableHosts(ctx context.Context) (hosts []api.HostInfo, err error) + } + + Migrator interface { + Migrate(ctx context.Context) + SignalMaintenanceFinished() + Status() (bool, time.Time) + Stop() + } + + SlabStore interface { + RefreshHealth(ctx context.Context) error + Slab(ctx context.Context, key object.EncryptionKey) (object.Slab, error) + SlabsForMigration(ctx context.Context, healthCutoff float64, limit int) ([]api.UnhealthySlab, error) + } +) + +type ( + migrator struct { + alerts alerts.Alerter + bus Bus + ss SlabStore + + healthCutoff float64 + parallelSlabsPerWorker uint64 + + masterKey utils.MasterKey + + contractSpendingRecorder *contractSpendingRecorder + + downloadManager *download.Manager + uploadManager *upload.Manager + + rhp3Client *rhp3.Client + rhp4Client *rhp4.Client + + accounts *accounts.Manager + priceTables *prices.PriceTables + pricesCache *prices.PricesCache + + signalConsensusNotSynced chan struct{} + signalMaintenanceFinished chan struct{} + + statsSlabMigrationSpeedMS *utils.DataPoints + + shutdownCtx context.Context + wg sync.WaitGroup + + logger *zap.SugaredLogger + + mu sync.Mutex + migrating bool + migratingLastStart time.Time + } +) + +func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, alerts alerts.Alerter, ss SlabStore, b Bus, logger *zap.Logger) (*migrator, error) { + logger = logger.Named("migrator") + + dialer := rhp.NewFallbackDialer(b, net.Dialer{}, logger) + m := &migrator{ + alerts: alerts, + bus: b, + ss: ss, + + healthCutoff: cfg.HealthCutoff, + parallelSlabsPerWorker: cfg.ParallelSlabsPerWorker, + + masterKey: masterKey, + + rhp3Client: rhp3.New(dialer, logger), + rhp4Client: rhp4.New(dialer), + + priceTables: prices.NewPriceTables(), + pricesCache: prices.NewPricesCache(), + + signalConsensusNotSynced: make(chan struct{}, 1), + signalMaintenanceFinished: make(chan struct{}, 1), + + statsSlabMigrationSpeedMS: utils.NewDataPoints(time.Hour), + + shutdownCtx: ctx, + + logger: logger.Sugar(), + } + + mgr, err := accounts.NewManager(masterKey.DeriveAccountsKey("migrator"), "migrator", alerts, m, m, b, b, b, b, time.Minute, logger) + if err != nil { + return nil, err + } + m.accounts = mgr + + mm := memory.NewManager(math.MaxInt64, logger) + uk := masterKey.DeriveUploadKey() + m.downloadManager = download.NewManager(ctx, &uk, m, mm, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, logger) + m.uploadManager = upload.NewManager(ctx, &uk, m, mm, b, b, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, logger) + + m.contractSpendingRecorder = &contractSpendingRecorder{ + bus: b, + logger: logger.Named("spending").Sugar(), + + flushCtx: ctx, + flushInterval: 5 * time.Second, // TODO: can be removed once we've moved it to the bus + + contractSpendings: make(map[types.FileContractID]api.ContractSpendingRecord), + } + + return m, nil +} + +func (m *migrator) Migrate(ctx context.Context) { + m.mu.Lock() + if m.migrating { + m.mu.Unlock() + return + } + m.migrating = true + m.migratingLastStart = time.Now() + m.mu.Unlock() + + m.wg.Add(1) + go func() { + defer m.wg.Done() + m.performMigrations(ctx) + m.mu.Lock() + m.migrating = false + m.mu.Unlock() + }() +} + +func (m *migrator) Stop() { + m.wg.Wait() +} + +func (m *migrator) SignalMaintenanceFinished() { + select { + case m.signalMaintenanceFinished <- struct{}{}: + default: + } +} + +func (m *migrator) Status() (bool, time.Time) { + m.mu.Lock() + defer m.mu.Unlock() + return m.migrating, m.migratingLastStart +} + +func (m *migrator) slabMigrationEstimate(remaining int) time.Duration { + // recompute p90 + m.statsSlabMigrationSpeedMS.Recompute() + + // return 0 if p90 is 0 (can happen if we haven't collected enough data points) + p90 := m.statsSlabMigrationSpeedMS.P90() + if p90 == 0 { + return 0 + } + + totalNumMS := float64(remaining) * p90 / float64(m.parallelSlabsPerWorker) + return time.Duration(totalNumMS) * time.Millisecond +} + +func (m *migrator) performMigrations(ctx context.Context) { + m.logger.Info("performing migrations") + + // prepare jobs channel + jobs := make(chan api.UnhealthySlab) + var wg sync.WaitGroup + defer func() { + close(jobs) + wg.Wait() + }() + + // launch workers + for i := uint64(0); i < m.parallelSlabsPerWorker; i++ { + wg.Add(1) + go func() { + defer wg.Done() + + // process jobs + for j := range jobs { + start := time.Now() + err := m.migrateSlab(ctx, j.EncryptionKey) + m.statsSlabMigrationSpeedMS.Track(float64(time.Since(start).Milliseconds())) + if utils.IsErr(err, api.ErrConsensusNotSynced) { + // interrupt migrations if consensus is not synced + select { + case m.signalConsensusNotSynced <- struct{}{}: + default: + } + return + } else if err != nil { + m.logger.Errorw("migration failed", + zap.Float64("health", j.Health), + zap.Stringer("slab", j.EncryptionKey)) + } + } + }() + } + var toMigrate []api.UnhealthySlab + + // ignore a potential signal before the first iteration of the 'OUTER' loop + select { + case <-m.signalMaintenanceFinished: + default: + } + + // helper to update 'toMigrate' + updateToMigrate := func() { + // fetch slabs for migration + toMigrateNew, err := m.ss.SlabsForMigration(ctx, m.healthCutoff, migratorBatchSize) + if err != nil { + m.logger.Errorf("failed to fetch slabs for migration, err: %v", err) + return + } + m.logger.Infof("%d potential slabs fetched for migration", len(toMigrateNew)) + + // merge toMigrateNew with toMigrate + // NOTE: when merging, we remove all slabs from toMigrate that don't + // require migration anymore. However, slabs that have been in toMigrate + // before will be repaired before any new slabs. This is to prevent + // starvation. + migrateNewMap := make(map[object.EncryptionKey]*api.UnhealthySlab) + for i, slab := range toMigrateNew { + migrateNewMap[slab.EncryptionKey] = &toMigrateNew[i] + } + removed := 0 + for i := 0; i < len(toMigrate)-removed; { + slab := toMigrate[i] + if _, exists := migrateNewMap[slab.EncryptionKey]; exists { + delete(migrateNewMap, slab.EncryptionKey) // delete from map to leave only new slabs + i++ + } else { + toMigrate[i] = toMigrate[len(toMigrate)-1-removed] + removed++ + } + } + toMigrate = toMigrate[:len(toMigrate)-removed] + for _, slab := range migrateNewMap { + toMigrate = append(toMigrate, *slab) + } + + // sort the newly added slabs by health + newSlabs := toMigrate[len(toMigrate)-len(migrateNewMap):] + sort.Slice(newSlabs, func(i, j int) bool { + return newSlabs[i].Health < newSlabs[j].Health + }) + } + + // unregister the ongoing migrations alert when we're done + defer func() { + if err := m.alerts.DismissAlerts(ctx, alertOngoingMigrationsID); err != nil { + m.logger.Errorf("failed to dismiss alert: %v", err) + } + }() + +OUTER: + for { + // recompute health. + start := time.Now() + if err := m.ss.RefreshHealth(ctx); err != nil { + if err := m.alerts.RegisterAlert(ctx, newRefreshHealthFailedAlert(err)); err != nil { + m.logger.Errorf("failed to register alert: %v", err) + } + m.logger.Errorf("failed to recompute cached health before migration: %v", err) + } else { + if err := m.alerts.DismissAlerts(ctx, alertHealthRefreshID); err != nil { + m.logger.Errorf("failed to dismiss alert: %v", err) + } + m.logger.Infof("recomputed slab health in %v", time.Since(start)) + updateToMigrate() + } + + // log the updated list of slabs to migrate + m.logger.Infof("%d slabs to migrate", len(toMigrate)) + + // return if there are no slabs to migrate + if len(toMigrate) == 0 { + return + } + + var lastRegister time.Time + for i, slab := range toMigrate { + if time.Since(lastRegister) > migrationAlertRegisterInterval { + // register an alert to notify users about ongoing migrations + remaining := len(toMigrate) - i + if err := m.alerts.RegisterAlert(ctx, newOngoingMigrationsAlert(remaining, m.slabMigrationEstimate(remaining))); err != nil { + m.logger.Errorf("failed to register alert: %v", err) + } + lastRegister = time.Now() + } + select { + case <-ctx.Done(): + return + case <-m.signalConsensusNotSynced: + m.logger.Info("migrations interrupted - consensus is not synced") + return + case <-m.signalMaintenanceFinished: + m.logger.Info("migrations interrupted - updating slabs for migration") + continue OUTER + case jobs <- slab: + } + } + + // all slabs migrated + return + } +} diff --git a/autopilot/migrator/spending.go b/autopilot/migrator/spending.go new file mode 100644 index 000000000..d4c96cf74 --- /dev/null +++ b/autopilot/migrator/spending.go @@ -0,0 +1,121 @@ +package migrator + +import ( + "context" + "fmt" + "sync" + "time" + + "go.sia.tech/core/types" + rhp "go.sia.tech/coreutils/rhp/v4" + "go.sia.tech/renterd/api" + "go.uber.org/zap" +) + +type ( + ContractSpendingRecorder interface { + RecordV1(types.FileContractRevision, api.ContractSpending) + RecordV2(rhp.ContractRevision, api.ContractSpending) + Stop(context.Context) + } + + contractSpendingRecorder struct { + flushInterval time.Duration + + bus Bus + logger *zap.SugaredLogger + + mu sync.Mutex + contractSpendings map[types.FileContractID]api.ContractSpendingRecord + + flushCtx context.Context + flushTimer *time.Timer + } +) + +var ( + _ ContractSpendingRecorder = (*contractSpendingRecorder)(nil) +) + +// RecordV1 stores the given contract spending record until it gets flushed to the bus. +func (r *contractSpendingRecorder) RecordV1(rev types.FileContractRevision, cs api.ContractSpending) { + r.record(rev.ParentID, rev.RevisionNumber, rev.Filesize, rev.ValidRenterPayout(), rev.MissedHostPayout(), cs) +} + +// RecordV2 stores the given contract spending record until it gets flushed to the bus. +func (r *contractSpendingRecorder) RecordV2(rev rhp.ContractRevision, cs api.ContractSpending) { + r.record(rev.ID, rev.Revision.RevisionNumber, rev.Revision.Filesize, rev.Revision.RenterOutput.Value, rev.Revision.HostOutput.Value, cs) +} + +// Stop stops the flush timer and flushes one last time. +func (r *contractSpendingRecorder) Stop(ctx context.Context) { + // stop the flush timer + r.mu.Lock() + if r.flushTimer != nil { + r.flushTimer.Stop() + } + r.flushCtx = ctx + r.mu.Unlock() + + // flush all interactions + r.flush() + + // log if we weren't able to flush them + r.mu.Lock() + if len(r.contractSpendings) > 0 { + r.logger.Errorw(fmt.Sprintf("failed to record %d contract spendings on worker shutdown", len(r.contractSpendings))) + } + r.mu.Unlock() +} + +func (r *contractSpendingRecorder) flush() { + r.mu.Lock() + defer r.mu.Unlock() + + // NOTE: don't bother flushing if the context is cancelled, we can safely + // ignore the buffered records since we'll flush on shutdown and log in case + // we weren't able to flush all spendings o the bus + select { + case <-r.flushCtx.Done(): + r.flushTimer = nil + return + default: + } + + if len(r.contractSpendings) > 0 { + records := make([]api.ContractSpendingRecord, 0, len(r.contractSpendings)) + for _, cs := range r.contractSpendings { + records = append(records, cs) + } + if err := r.bus.RecordContractSpending(r.flushCtx, records); err != nil { + r.logger.Errorw(fmt.Sprintf("failed to record contract spending: %v", err)) + } else { + r.contractSpendings = make(map[types.FileContractID]api.ContractSpendingRecord) + } + } + r.flushTimer = nil +} + +func (r *contractSpendingRecorder) record(fcid types.FileContractID, revisionNumber, size uint64, validRenterPayout, missedHostPayout types.Currency, cs api.ContractSpending) { + r.mu.Lock() + defer r.mu.Unlock() + + // record the spending + csr, found := r.contractSpendings[fcid] + if !found { + csr = api.ContractSpendingRecord{ContractID: fcid} + } + csr.ContractSpending = csr.ContractSpending.Add(cs) + if revisionNumber > csr.RevisionNumber { + csr.RevisionNumber = revisionNumber + csr.Size = size + csr.ValidRenterPayout = validRenterPayout + csr.MissedHostPayout = missedHostPayout + } + r.contractSpendings[fcid] = csr + + // schedule flush + if r.flushTimer == nil { + r.flushTimer = time.AfterFunc(r.flushInterval, r.flush) + } +} diff --git a/worker/migrations.go b/autopilot/migrator/worker.go similarity index 52% rename from worker/migrations.go rename to autopilot/migrator/worker.go index dc42ab6db..872cdd81b 100644 --- a/worker/migrations.go +++ b/autopilot/migrator/worker.go @@ -1,18 +1,96 @@ -package worker +package migrator import ( "context" "fmt" rhpv2 "go.sia.tech/core/rhp/v2" + "go.sia.tech/core/types" + "go.sia.tech/renterd/alerts" "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/gouging" "go.sia.tech/renterd/internal/upload" + "go.sia.tech/renterd/internal/utils" "go.sia.tech/renterd/object" "go.uber.org/zap" ) -func (w *Worker) migrate(ctx context.Context, s object.Slab, dlHosts []api.HostInfo, ulHosts []upload.HostInfo, bh uint64) error { +func (m *migrator) migrateSlab(ctx context.Context, key object.EncryptionKey) error { + // fetch slab + slab, err := m.ss.Slab(ctx, key) + if err != nil { + return fmt.Errorf("couldn't fetch slab from bus: %w", err) + } + + // fetch the upload parameters + up, err := m.bus.UploadParams(ctx) + if err != nil { + return fmt.Errorf("couldn't fetch upload parameters from bus: %w", err) + } + + // cancel the upload if consensus is not synced + if !up.ConsensusState.Synced { + m.logger.Errorf("migration cancelled, err: %v", api.ErrConsensusNotSynced) + return api.ErrConsensusNotSynced + } + + // attach gouging checker to the context + ctx = gouging.WithChecker(ctx, m.bus, up.GougingParams) + + // fetch hosts + dlHosts, err := m.bus.UsableHosts(ctx) + if err != nil { + return fmt.Errorf("couldn't fetch hosts from bus: %w", err) + } + + hmap := make(map[types.PublicKey]api.HostInfo) + for _, h := range dlHosts { + hmap[h.PublicKey] = h + } + + contracts, err := m.bus.Contracts(ctx, api.ContractsOpts{FilterMode: api.ContractFilterModeGood}) + if err != nil { + return fmt.Errorf("couldn't fetch contracts from bus: %v", err) + } + + var ulHosts []upload.HostInfo + for _, c := range contracts { + if h, ok := hmap[c.HostKey]; ok { + ulHosts = append(ulHosts, upload.HostInfo{ + HostInfo: h, + ContractEndHeight: c.WindowEnd, + ContractID: c.ID, + ContractRenewedFrom: c.RenewedFrom, + }) + } + } + + // migrate the slab and handle alerts + err = m.migrate(ctx, slab, dlHosts, ulHosts, up.CurrentHeight) + if err != nil && !utils.IsErr(err, api.ErrSlabNotFound) { + var objects []api.ObjectMetadata + if res, err := m.bus.Objects(ctx, "", api.ListObjectOptions{SlabEncryptionKey: slab.EncryptionKey}); err != nil { + m.logger.Errorf("failed to list objects for slab key; %v", err) + } else { + objects = res.Objects + } + m.alerts.RegisterAlert(ctx, newMigrationFailedAlert(slab.EncryptionKey, slab.Health, objects, err)) + } else if err == nil { + m.alerts.DismissAlerts(ctx, alerts.IDForSlab(alertMigrationID, slab.EncryptionKey)) + } + + if err != nil { + m.logger.Errorw("failed to migrate slab", + zap.Error(err), + zap.Stringer("slab", slab.EncryptionKey), + ) + return err + } + return nil +} + +func (m *migrator) migrate(ctx context.Context, s object.Slab, dlHosts []api.HostInfo, ulHosts []upload.HostInfo, bh uint64) error { // map usable hosts usableHosts := make(map[types.PublicKey]struct{}) for _, h := range dlHosts { @@ -78,16 +156,16 @@ SHARDS: } // acquire memory for the migration - mem := w.uploadManager.AcquireMemory(ctx, uint64(len(shardIndices))*rhpv2.SectorSize) + mem := m.uploadManager.AcquireMemory(ctx, uint64(len(shardIndices))*rhpv2.SectorSize) if mem == nil { return fmt.Errorf("failed to acquire memory for migration") } defer mem.Release() // download the slab - shards, err := w.downloadManager.DownloadSlab(ctx, s, dlHosts) + shards, err := m.downloadManager.DownloadSlab(ctx, s, dlHosts) if err != nil { - w.logger.Debugw("slab migration failed", + m.logger.Debugw("slab migration failed", zap.Error(err), zap.Stringer("slab", s.EncryptionKey), zap.Int("numShardsMigrated", len(shards)), @@ -112,9 +190,9 @@ SHARDS: } // migrate the shards - err = w.uploadManager.UploadShards(ctx, s, shardIndices, shards, allowed, bh, mem) + err = m.uploadManager.UploadShards(ctx, s, shardIndices, shards, allowed, bh, mem) if err != nil { - w.logger.Debugw("slab migration failed", + m.logger.Debugw("slab migration failed", zap.Error(err), zap.Stringer("slab", s.EncryptionKey), zap.Int("numShardsMigrated", len(shards)), @@ -123,7 +201,7 @@ SHARDS: } // debug log migration result - w.logger.Debugw("slab migration succeeded", + m.logger.Debugw("slab migration succeeded", zap.Stringer("slab", s.EncryptionKey), zap.Int("numShardsMigrated", len(shards)), ) diff --git a/autopilot/workerpool.go b/autopilot/workerpool.go deleted file mode 100644 index d3260dd65..000000000 --- a/autopilot/workerpool.go +++ /dev/null @@ -1,48 +0,0 @@ -package autopilot - -import ( - "context" - "sync" - - "go.sia.tech/core/types" - "go.sia.tech/renterd/api" - "go.sia.tech/renterd/object" - "lukechampine.com/frand" -) - -type Worker interface { - Account(ctx context.Context, hostKey types.PublicKey) (api.Account, error) - MigrateSlab(ctx context.Context, s object.Slab) error -} - -// workerPool contains all workers known to the autopilot. Users can call -// withWorker to execute a function with a worker of the pool or withWorkers to -// sequentially run a function on all workers. Due to the RWMutex this will -// never block during normal operations. However, during an update of the -// workerpool, this allows us to guarantee that all workers have finished their -// tasks by calling acquiring an exclusive lock on the pool before updating it. -// That way the caller who updated the pool can rely on the autopilot not using -// a worker that was removed during the update after the update operation -// returns. -type workerPool struct { - mu sync.RWMutex - workers []Worker -} - -func newWorkerPool(workers []Worker) *workerPool { - return &workerPool{ - workers: workers, - } -} - -func (wp *workerPool) withWorker(workerFunc func(Worker)) { - wp.mu.RLock() - defer wp.mu.RUnlock() - workerFunc(wp.workers[frand.Intn(len(wp.workers))]) -} - -func (wp *workerPool) withWorkers(workerFunc func([]Worker)) { - wp.mu.RLock() - defer wp.mu.RUnlock() - workerFunc(wp.workers) -} diff --git a/cmd/renterd/config.go b/cmd/renterd/config.go index ee9252096..fa6c37652 100644 --- a/cmd/renterd/config.go +++ b/cmd/renterd/config.go @@ -40,9 +40,7 @@ var ( disableStdin bool enableANSI = runtime.GOOS != "windows" - hostBasesStr string - workerRemotePassStr string - workerRemoteAddrsStr string + hostBasesStr string ) func defaultConfig() config.Config { @@ -109,14 +107,20 @@ func defaultConfig() config.Config { Autopilot: config.Autopilot{ Enabled: true, - RevisionSubmissionBuffer: 150, // 144 + 6 blocks leeway - Heartbeat: 30 * time.Minute, - MigrationHealthCutoff: 0.75, - RevisionBroadcastInterval: 7 * 24 * time.Hour, - ScannerBatchSize: 100, - ScannerInterval: 4 * time.Hour, - ScannerNumThreads: 10, - MigratorParallelSlabsPerWorker: 1, + RevisionSubmissionBuffer: 150, // 144 + 6 blocks leeway + Heartbeat: 30 * time.Minute, + RevisionBroadcastInterval: 7 * 24 * time.Hour, + ScannerBatchSize: 100, + ScannerInterval: 4 * time.Hour, + ScannerNumThreads: 10, + }, + Migrator: config.Migrator{ + HealthCutoff: 0.75, + ParallelSlabsPerWorker: 1, + DownloadMaxOverdrive: 5, + DownloadOverdriveTimeout: 3 * time.Second, + UploadMaxOverdrive: 5, + UploadOverdriveTimeout: 3 * time.Second, }, S3: config.S3{ Address: "localhost:8080", @@ -172,22 +176,6 @@ func loadConfig() (cfg config.Config, network *consensus.Network, genesis types. } func sanitizeConfig(cfg *config.Config) error { - // parse remotes - if workerRemoteAddrsStr != "" && workerRemotePassStr != "" { - cfg.Worker.Remotes = cfg.Worker.Remotes[:0] - for _, addr := range strings.Split(workerRemoteAddrsStr, ";") { - cfg.Worker.Remotes = append(cfg.Worker.Remotes, config.RemoteWorker{ - Address: addr, - Password: workerRemotePassStr, - }) - } - } - - // disable worker if remotes are set - if len(cfg.Worker.Remotes) > 0 { - cfg.Worker.Enabled = false - } - // combine host bucket bases for _, base := range strings.Split(hostBasesStr, ",") { if trimmed := strings.TrimSpace(base); trimmed != "" { @@ -305,15 +293,21 @@ func parseCLIFlags(cfg *config.Config) { // autopilot flag.DurationVar(&cfg.Autopilot.Heartbeat, "autopilot.heartbeat", cfg.Autopilot.Heartbeat, "Interval for autopilot loop execution") - flag.Float64Var(&cfg.Autopilot.MigrationHealthCutoff, "autopilot.migrationHealthCutoff", cfg.Autopilot.MigrationHealthCutoff, "Threshold for migrating slabs based on health") flag.DurationVar(&cfg.Autopilot.RevisionBroadcastInterval, "autopilot.revisionBroadcastInterval", cfg.Autopilot.RevisionBroadcastInterval, "Interval for broadcasting contract revisions (overrides with RENTERD_AUTOPILOT_REVISION_BROADCAST_INTERVAL)") flag.Uint64Var(&cfg.Autopilot.ScannerBatchSize, "autopilot.scannerBatchSize", cfg.Autopilot.ScannerBatchSize, "Batch size for host scanning") flag.DurationVar(&cfg.Autopilot.ScannerInterval, "autopilot.scannerInterval", cfg.Autopilot.ScannerInterval, "Interval for scanning hosts") flag.Uint64Var(&cfg.Autopilot.ScannerNumThreads, "autopilot.scannerNumThreads", cfg.Autopilot.ScannerNumThreads, "Number of threads for scanning hosts") - flag.Uint64Var(&cfg.Autopilot.MigratorParallelSlabsPerWorker, "autopilot.migratorParallelSlabsPerWorker", cfg.Autopilot.MigratorParallelSlabsPerWorker, "Parallel slab migrations per worker (overrides with RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER)") flag.BoolVar(&cfg.Autopilot.Enabled, "autopilot.enabled", cfg.Autopilot.Enabled, "Enables/disables autopilot (overrides with RENTERD_AUTOPILOT_ENABLED)") flag.DurationVar(&cfg.ShutdownTimeout, "node.shutdownTimeout", cfg.ShutdownTimeout, "Timeout for node shutdown") + // migrator + flag.Float64Var(&cfg.Migrator.HealthCutoff, "migrator.healthCutoff", cfg.Migrator.HealthCutoff, "Threshold for migrating slabs based on health") + flag.Uint64Var(&cfg.Migrator.ParallelSlabsPerWorker, "migrator.parallelSlabsPerWorker", cfg.Migrator.ParallelSlabsPerWorker, "Parallel slab migrations per worker (overrides with RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER)") + flag.Uint64Var(&cfg.Migrator.DownloadMaxOverdrive, "migrator.downloadMaxOverdrive", cfg.Migrator.DownloadMaxOverdrive, "Max overdrive workers for migration downloads") + flag.DurationVar(&cfg.Migrator.DownloadOverdriveTimeout, "migrator.downloadOverdriveTimeout", cfg.Migrator.DownloadOverdriveTimeout, "Timeout for overdriving migration downloads") + flag.Uint64Var(&cfg.Migrator.UploadMaxOverdrive, "migrator.uploadMaxOverdrive", cfg.Migrator.UploadMaxOverdrive, "Max overdrive workers for migration uploads") + flag.DurationVar(&cfg.Migrator.UploadOverdriveTimeout, "migrator.uploadOverdriveTimeout", cfg.Migrator.UploadOverdriveTimeout, "Timeout for overdriving migration uploads") + // s3 flag.StringVar(&cfg.S3.Address, "s3.address", cfg.S3.Address, "Address for serving S3 API (overrides with RENTERD_S3_ADDRESS)") flag.BoolVar(&cfg.S3.DisableAuth, "s3.disableAuth", cfg.S3.DisableAuth, "Disables authentication for S3 API (overrides with RENTERD_S3_DISABLE_AUTH)") @@ -368,7 +362,8 @@ func parseEnvironmentVariables(cfg *config.Config) { parseEnvVar("RENTERD_AUTOPILOT_ENABLED", &cfg.Autopilot.Enabled) parseEnvVar("RENTERD_AUTOPILOT_REVISION_BROADCAST_INTERVAL", &cfg.Autopilot.RevisionBroadcastInterval) - parseEnvVar("RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER", &cfg.Autopilot.MigratorParallelSlabsPerWorker) + + parseEnvVar("RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER", &cfg.Migrator.ParallelSlabsPerWorker) parseEnvVar("RENTERD_S3_ADDRESS", &cfg.S3.Address) parseEnvVar("RENTERD_S3_ENABLED", &cfg.S3.Enabled) @@ -388,9 +383,6 @@ func parseEnvironmentVariables(cfg *config.Config) { parseEnvVar("RENTERD_LOG_DATABASE_IGNORE_RECORD_NOT_FOUND_ERROR", &cfg.Log.Database.IgnoreRecordNotFoundError) parseEnvVar("RENTERD_LOG_DATABASE_SLOW_THRESHOLD", &cfg.Log.Database.SlowThreshold) - parseEnvVar("RENTERD_WORKER_REMOTE_ADDRS", &workerRemoteAddrsStr) - parseEnvVar("RENTERD_WORKER_API_PASSWORD", &workerRemotePassStr) - parseEnvVar("RENTERD_EXPLORER_DISABLE", &cfg.Explorer.Disable) parseEnvVar("RENTERD_EXPLORER_URL", &cfg.Explorer.URL) } diff --git a/cmd/renterd/node.go b/cmd/renterd/node.go index c5d2526e3..a04072d6a 100644 --- a/cmd/renterd/node.go +++ b/cmd/renterd/node.go @@ -68,9 +68,6 @@ func newNode(cfg config.Config, network *consensus.Network, genesis types.Block) if cfg.Bus.RemoteAddr != "" && !cfg.Worker.Enabled && !cfg.Autopilot.Enabled { return nil, errors.New("remote bus, remote worker, and no autopilot -- nothing to do!") } - if cfg.Autopilot.Enabled && !cfg.Worker.Enabled && len(cfg.Worker.Remotes) == 0 { - return nil, errors.New("can't enable autopilot without providing either workers to connect to or creating a worker") - } // initialise directory err := os.MkdirAll(cfg.Directory, 0700) @@ -156,58 +153,49 @@ func newNode(cfg config.Config, network *consensus.Network, genesis types.Block) // initialise workers var s3Srv *http.Server var s3Listener net.Listener - var workers []autopilot.Worker - if len(cfg.Worker.Remotes) == 0 { - if cfg.Worker.Enabled { - workerKey := blake2b.Sum256(append([]byte("worker"), pk...)) - w, err := worker.New(cfg.Worker, workerKey, bc, logger) + if cfg.Worker.Enabled { + workerKey := blake2b.Sum256(append([]byte("worker"), pk...)) + w, err := worker.New(cfg.Worker, workerKey, bc, logger) + if err != nil { + logger.Fatal("failed to create worker: " + err.Error()) + } + shutdownFns = append(shutdownFns, fn{ + name: "Worker", + fn: w.Shutdown, + }) + + mux.Sub["/api/worker"] = utils.TreeMux{Handler: utils.Auth(cfg.HTTP.Password, cfg.Worker.AllowUnauthenticatedDownloads)(w.Handler())} + + if cfg.S3.Enabled { + s3Handler, err := s3.New(bc, w, logger, s3.Opts{ + AuthDisabled: cfg.S3.DisableAuth, + HostBucketBases: cfg.S3.HostBucketBases, + HostBucketEnabled: cfg.S3.HostBucketEnabled, + }) if err != nil { - logger.Fatal("failed to create worker: " + err.Error()) + err = errors.Join(err, w.Shutdown(context.Background())) + logger.Fatal("failed to create s3 handler: " + err.Error()) } - shutdownFns = append(shutdownFns, fn{ - name: "Worker", - fn: w.Shutdown, - }) - mux.Sub["/api/worker"] = utils.TreeMux{Handler: utils.Auth(cfg.HTTP.Password, cfg.Worker.AllowUnauthenticatedDownloads)(w.Handler())} - wc := worker.NewClient(cfg.HTTP.Address+"/api/worker", cfg.HTTP.Password) - workers = append(workers, wc) - - if cfg.S3.Enabled { - s3Handler, err := s3.New(bc, w, logger, s3.Opts{ - AuthDisabled: cfg.S3.DisableAuth, - HostBucketBases: cfg.S3.HostBucketBases, - HostBucketEnabled: cfg.S3.HostBucketEnabled, - }) - if err != nil { - err = errors.Join(err, w.Shutdown(context.Background())) - logger.Fatal("failed to create s3 handler: " + err.Error()) - } - - s3Srv = &http.Server{ - Addr: cfg.S3.Address, - Handler: s3Handler, - } - s3Listener, err = utils.ListenTCP(cfg.S3.Address, logger) - if err != nil { - logger.Fatal("failed to create listener: " + err.Error()) - } - shutdownFns = append(shutdownFns, fn{ - name: "S3", - fn: s3Srv.Shutdown, - }) + s3Srv = &http.Server{ + Addr: cfg.S3.Address, + Handler: s3Handler, } - } - } else { - for _, remote := range cfg.Worker.Remotes { - workers = append(workers, worker.NewClient(remote.Address, remote.Password)) - logger.Info("connecting to remote worker at " + remote.Address) + s3Listener, err = utils.ListenTCP(cfg.S3.Address, logger) + if err != nil { + logger.Fatal("failed to create listener: " + err.Error()) + } + shutdownFns = append(shutdownFns, fn{ + name: "S3", + fn: s3Srv.Shutdown, + }) } } // initialise autopilot if cfg.Autopilot.Enabled { - ap, err := autopilot.New(cfg.Autopilot, bc, workers, logger) + workerKey := blake2b.Sum256(append([]byte("worker"), pk...)) + ap, err := autopilot.New(cfg.Autopilot, cfg.Migrator, workerKey, bc, logger) if err != nil { logger.Fatal("failed to create autopilot: " + err.Error()) } diff --git a/config/config.go b/config/config.go index 085503b49..40b77ce09 100644 --- a/config/config.go +++ b/config/config.go @@ -24,6 +24,7 @@ type ( Worker Worker `yaml:"worker,omitempty"` S3 S3 `yaml:"s3,omitempty"` + Migrator Migrator `yaml:"migrator,omitempty"` Database Database `yaml:"database,omitempty"` Explorer ExplorerData `yaml:"explorer,omitempty"` } @@ -103,11 +104,6 @@ type ( MetricsDatabase string `yaml:"metricsDatabase,omitempty"` } - RemoteWorker struct { - Address string `yaml:"address,omitempty"` - Password string `yaml:"password,omitempty"` - } - S3 struct { Address string `yaml:"address,omitempty"` DisableAuth bool `yaml:"disableAuth,omitempty"` @@ -118,32 +114,39 @@ type ( // Worker contains the configuration for a worker. Worker struct { - Enabled bool `yaml:"enabled,omitempty"` - ID string `yaml:"id,omitempty"` - Remotes []RemoteWorker `yaml:"remotes,omitempty"` - AccountsRefillInterval time.Duration `yaml:"accountsRefillInterval,omitempty"` - BusFlushInterval time.Duration `yaml:"busFlushInterval,omitempty"` - DownloadOverdriveTimeout time.Duration `yaml:"downloadOverdriveTimeout,omitempty"` - UploadOverdriveTimeout time.Duration `yaml:"uploadOverdriveTimeout,omitempty"` - DownloadMaxOverdrive uint64 `yaml:"downloadMaxOverdrive,omitempty"` - DownloadMaxMemory uint64 `yaml:"downloadMaxMemory,omitempty"` - UploadMaxMemory uint64 `yaml:"uploadMaxMemory,omitempty"` - UploadMaxOverdrive uint64 `yaml:"uploadMaxOverdrive,omitempty"` - AllowUnauthenticatedDownloads bool `yaml:"allowUnauthenticatedDownloads,omitempty"` + Enabled bool `yaml:"enabled,omitempty"` + ID string `yaml:"id,omitempty"` + AccountsRefillInterval time.Duration `yaml:"accountsRefillInterval,omitempty"` + BusFlushInterval time.Duration `yaml:"busFlushInterval,omitempty"` + DownloadOverdriveTimeout time.Duration `yaml:"downloadOverdriveTimeout,omitempty"` + UploadOverdriveTimeout time.Duration `yaml:"uploadOverdriveTimeout,omitempty"` + DownloadMaxOverdrive uint64 `yaml:"downloadMaxOverdrive,omitempty"` + DownloadMaxMemory uint64 `yaml:"downloadMaxMemory,omitempty"` + UploadMaxMemory uint64 `yaml:"uploadMaxMemory,omitempty"` + UploadMaxOverdrive uint64 `yaml:"uploadMaxOverdrive,omitempty"` + AllowUnauthenticatedDownloads bool `yaml:"allowUnauthenticatedDownloads,omitempty"` } // Autopilot contains the configuration for an autopilot. Autopilot struct { - Enabled bool `yaml:"enabled,omitempty"` - AllowRedundantHostIPs bool `yaml:"allowRedundantHostIPs,omitempty"` - Heartbeat time.Duration `yaml:"heartbeat,omitempty"` - MigrationHealthCutoff float64 `yaml:"migrationHealthCutoff,omitempty"` - RevisionBroadcastInterval time.Duration `yaml:"revisionBroadcastInterval,omitempty"` - RevisionSubmissionBuffer uint64 `yaml:"revisionSubmissionBuffer,omitempty"` - ScannerInterval time.Duration `yaml:"scannerInterval,omitempty"` - ScannerBatchSize uint64 `yaml:"scannerBatchSize,omitempty"` - ScannerNumThreads uint64 `yaml:"scannerNumThreads,omitempty"` - MigratorParallelSlabsPerWorker uint64 `yaml:"migratorParallelSlabsPerWorker,omitempty"` + Enabled bool `yaml:"enabled,omitempty"` + AllowRedundantHostIPs bool `yaml:"allowRedundantHostIPs,omitempty"` + Heartbeat time.Duration `yaml:"heartbeat,omitempty"` + RevisionBroadcastInterval time.Duration `yaml:"revisionBroadcastInterval,omitempty"` + RevisionSubmissionBuffer uint64 `yaml:"revisionSubmissionBuffer,omitempty"` + ScannerInterval time.Duration `yaml:"scannerInterval,omitempty"` + ScannerBatchSize uint64 `yaml:"scannerBatchSize,omitempty"` + ScannerNumThreads uint64 `yaml:"scannerNumThreads,omitempty"` + } + + // Migrator contains the configuration for a migrator. + Migrator struct { + DownloadMaxOverdrive uint64 `yaml:"downloadMaxOverdrive,omitempty"` + DownloadOverdriveTimeout time.Duration `yaml:"downloadOverdriveTimeout,omitempty"` + HealthCutoff float64 `yaml:"healthCutoff,omitempty"` + ParallelSlabsPerWorker uint64 `yaml:"parallelSlabsPerWorker,omitempty"` + UploadMaxOverdrive uint64 `yaml:"uploadMaxOverdrive,omitempty"` + UploadOverdriveTimeout time.Duration `yaml:"uploadOverdriveTimeout,omitempty"` } ) diff --git a/internal/worker/accounts.go b/internal/accounts/accounts.go similarity index 92% rename from internal/worker/accounts.go rename to internal/accounts/accounts.go index 421eef593..e1a7b33b1 100644 --- a/internal/worker/accounts.go +++ b/internal/accounts/accounts.go @@ -1,4 +1,4 @@ -package worker +package accounts import ( "context" @@ -42,15 +42,15 @@ var ( ) type ( - AccountFunder interface { + Funder interface { FundAccount(ctx context.Context, fcid types.FileContractID, hk types.PublicKey, desired types.Currency) error } - AccountSyncer interface { + Syncer interface { SyncAccount(ctx context.Context, fcid types.FileContractID, host api.HostInfo) error } - AccountStore interface { + Store interface { Accounts(context.Context, string) ([]api.Account, error) UpdateAccounts(context.Context, []api.Account) error } @@ -69,14 +69,14 @@ type ( ) type ( - AccountMgr struct { + Manager struct { alerts alerts.Alerter - funder AccountFunder - syncer AccountSyncer + funder Funder + syncer Syncer cs ContractStore hs HostStore css ConsensusStateStore - s AccountStore + s Store key utils.AccountsKey logger *zap.SugaredLogger owner string @@ -104,14 +104,14 @@ type ( } ) -// NewAccountManager creates a new account manager. It will load all accounts -// from the given store and mark the shutdown as unclean. When Shutdown is -// called it will save all accounts. -func NewAccountManager(key utils.AccountsKey, owner string, alerter alerts.Alerter, funder AccountFunder, syncer AccountSyncer, css ConsensusStateStore, cs ContractStore, hs HostStore, s AccountStore, refillInterval time.Duration, l *zap.Logger) (*AccountMgr, error) { +// NewManager creates a new account manager. It will load all accounts from the +// given store and mark the shutdown as unclean. When Shutdown is called it will +// save all accounts. +func NewManager(key utils.AccountsKey, owner string, alerter alerts.Alerter, funder Funder, syncer Syncer, css ConsensusStateStore, cs ContractStore, hs HostStore, s Store, refillInterval time.Duration, l *zap.Logger) (*Manager, error) { logger := l.Named("accounts").Sugar() shutdownCtx, shutdownCancel := context.WithCancel(context.Background()) - a := &AccountMgr{ + a := &Manager{ alerts: alerter, funder: funder, syncer: syncer, @@ -140,13 +140,13 @@ func NewAccountManager(key utils.AccountsKey, owner string, alerter alerts.Alert } // Account returns the account with the given id. -func (a *AccountMgr) Account(hostKey types.PublicKey) api.Account { +func (a *Manager) Account(hostKey types.PublicKey) api.Account { acc := a.account(hostKey) return acc.convert() } // Accounts returns all accounts. -func (a *AccountMgr) Accounts() []api.Account { +func (a *Manager) Accounts() []api.Account { a.mu.Lock() defer a.mu.Unlock() accounts := make([]api.Account, 0, len(a.byID)) @@ -157,7 +157,7 @@ func (a *AccountMgr) Accounts() []api.Account { } // ResetDrift resets the drift on an account. -func (a *AccountMgr) ResetDrift(id rhpv3.Account) error { +func (a *Manager) ResetDrift(id rhpv3.Account) error { a.mu.Lock() account, exists := a.byID[id] if !exists { @@ -170,7 +170,7 @@ func (a *AccountMgr) ResetDrift(id rhpv3.Account) error { return nil } -func (a *AccountMgr) Shutdown(ctx context.Context) error { +func (a *Manager) Shutdown(ctx context.Context) error { accounts := a.Accounts() err := a.s.UpdateAccounts(ctx, accounts) if err != nil { @@ -194,7 +194,7 @@ func (a *AccountMgr) Shutdown(ctx context.Context) error { return nil } -func (a *AccountMgr) account(hk types.PublicKey) *Account { +func (a *Manager) account(hk types.PublicKey) *Account { a.mu.Lock() defer a.mu.Unlock() @@ -226,11 +226,11 @@ func (a *AccountMgr) account(hk types.PublicKey) *Account { // ForHost returns an account to use for a given host. If the account // doesn't exist, a new one is created. -func (a *AccountMgr) ForHost(hk types.PublicKey) *Account { +func (a *Manager) ForHost(hk types.PublicKey) *Account { return a.account(hk) } -func (a *AccountMgr) run() { +func (a *Manager) run() { // wait for store to become available var saved []api.Account var err error @@ -300,7 +300,7 @@ func (a *AccountMgr) run() { } } -func (a *AccountMgr) markRefillInProgress(hk types.PublicKey) bool { +func (a *Manager) markRefillInProgress(hk types.PublicKey) bool { a.mu.Lock() defer a.mu.Unlock() _, inProgress := a.inProgressRefills[hk] @@ -311,7 +311,7 @@ func (a *AccountMgr) markRefillInProgress(hk types.PublicKey) bool { return true } -func (a *AccountMgr) markRefillDone(hk types.PublicKey) { +func (a *Manager) markRefillDone(hk types.PublicKey) { a.mu.Lock() defer a.mu.Unlock() _, inProgress := a.inProgressRefills[hk] @@ -326,7 +326,7 @@ func (a *AccountMgr) markRefillDone(hk types.PublicKey) { // is used for every host. If a slow host's account is still being refilled by a // goroutine from a previous call, refillWorkerAccounts will skip that account // until the previously launched goroutine returns. -func (a *AccountMgr) refillAccounts() { +func (a *Manager) refillAccounts() { // fetch all contracts contracts, err := a.cs.Contracts(a.shutdownCtx, api.ContractsOpts{}) if err != nil { @@ -386,7 +386,7 @@ func (a *AccountMgr) refillAccounts() { } } -func (a *AccountMgr) refillAccount(ctx context.Context, contract api.ContractMetadata, host api.HostInfo) (bool, error) { +func (a *Manager) refillAccount(ctx context.Context, contract api.ContractMetadata, host api.HostInfo) (bool, error) { // fetch the account account := a.Account(contract.HostKey) diff --git a/internal/worker/accounts_test.go b/internal/accounts/accounts_test.go similarity index 95% rename from internal/worker/accounts_test.go rename to internal/accounts/accounts_test.go index e46223dcd..36a70f5dd 100644 --- a/internal/worker/accounts_test.go +++ b/internal/accounts/accounts_test.go @@ -1,4 +1,4 @@ -package worker +package accounts import ( "context" @@ -64,7 +64,7 @@ func TestAccounts(t *testing.T) { }, }, } - mgr, err := NewAccountManager(utils.AccountsKey(types.GeneratePrivateKey()), "test", b, b, b, b, b, b, b, time.Second, zap.NewNop()) + mgr, err := NewManager(utils.AccountsKey(types.GeneratePrivateKey()), "test", b, b, b, b, b, b, b, time.Second, zap.NewNop()) if err != nil { t.Fatal(err) } @@ -178,7 +178,7 @@ func TestResetAccountDriftRate(t *testing.T) { hi := api.HostInfo{ PublicKey: hk, } - mgr, err := NewAccountManager(utils.AccountsKey(types.GeneratePrivateKey()), "test", b, b, b, b, b, b, b, time.Second, zap.NewNop()) + mgr, err := NewManager(utils.AccountsKey(types.GeneratePrivateKey()), "test", b, b, b, b, b, b, b, time.Second, zap.NewNop()) if err != nil { t.Fatal(err) } diff --git a/worker/gouging.go b/internal/gouging/context.go similarity index 51% rename from worker/gouging.go rename to internal/gouging/context.go index 75ac0bbf8..c4ec02c9f 100644 --- a/worker/gouging.go +++ b/internal/gouging/context.go @@ -1,11 +1,10 @@ -package worker +package gouging import ( "context" "fmt" "go.sia.tech/renterd/api" - "go.sia.tech/renterd/internal/gouging" ) const ( @@ -14,16 +13,16 @@ const ( type contextKey string -func GougingCheckerFromContext(ctx context.Context) (gouging.Checker, error) { - gc, ok := ctx.Value(keyGougingChecker).(func() (gouging.Checker, error)) +func CheckerFromContext(ctx context.Context) (Checker, error) { + gc, ok := ctx.Value(keyGougingChecker).(func() (Checker, error)) if !ok { panic("no gouging checker attached to the context") // developer error } return gc() } -func WithGougingChecker(ctx context.Context, cs gouging.ConsensusState, gp api.GougingParams) context.Context { - return context.WithValue(ctx, keyGougingChecker, func() (gouging.Checker, error) { +func WithChecker(ctx context.Context, cs ConsensusState, gp api.GougingParams) context.Context { + return context.WithValue(ctx, keyGougingChecker, func() (Checker, error) { cs, err := cs.ConsensusState(ctx) if err != nil { return nil, fmt.Errorf("failed to get consensus state: %w", err) @@ -32,6 +31,6 @@ func WithGougingChecker(ctx context.Context, cs gouging.ConsensusState, gp api.G }) } -func newGougingChecker(settings api.GougingSettings, cs api.ConsensusState) gouging.Checker { - return gouging.NewChecker(settings, cs) +func newGougingChecker(settings api.GougingSettings, cs api.ConsensusState) Checker { + return NewChecker(settings, cs) } diff --git a/internal/test/e2e/cluster.go b/internal/test/e2e/cluster.go index dcd210c83..8b947c0a8 100644 --- a/internal/test/e2e/cluster.go +++ b/internal/test/e2e/cluster.go @@ -237,7 +237,7 @@ func newTestCluster(t *testing.T, opts testClusterOptions) *TestCluster { wk = *opts.walletKey } - busCfg, workerCfg, apCfg, dbCfg := testBusCfg(), testWorkerCfg(), testApCfg(), testDBCfg() + busCfg, workerCfg, apCfg, mCfg, dbCfg := testBusCfg(), testWorkerCfg(), testApCfg(), testMigratorCfg(), testDBCfg() if opts.busCfg != nil { busCfg = *opts.busCfg } @@ -373,7 +373,7 @@ func newTestCluster(t *testing.T, opts testClusterOptions) *TestCluster { s3ShutdownFns = append(s3ShutdownFns, s3Server.Shutdown) // Create autopilot. - ap, err := autopilot.New(apCfg, busClient, []autopilot.Worker{workerClient}, logger) + ap, err := autopilot.New(apCfg, mCfg, workerKey, busClient, logger) tt.OK(err) autopilotAuth := jape.BasicAuth(autopilotPassword) @@ -996,20 +996,30 @@ func testWorkerCfg() config.Worker { UploadOverdriveTimeout: 500 * time.Millisecond, DownloadMaxMemory: 1 << 28, // 256 MiB UploadMaxMemory: 1 << 28, // 256 MiB + DownloadMaxOverdrive: 5, // TODO: added b/c I think this was overlooked but not sure UploadMaxOverdrive: 5, } } func testApCfg() config.Autopilot { return config.Autopilot{ - AllowRedundantHostIPs: true, - Heartbeat: time.Second, - MigrationHealthCutoff: 0.99, - MigratorParallelSlabsPerWorker: 1, - RevisionSubmissionBuffer: 0, - ScannerInterval: 10 * time.Millisecond, - ScannerBatchSize: 10, - ScannerNumThreads: 1, + AllowRedundantHostIPs: true, + Heartbeat: time.Second, + RevisionSubmissionBuffer: 0, + ScannerInterval: 10 * time.Millisecond, + ScannerBatchSize: 10, + ScannerNumThreads: 1, + } +} + +func testMigratorCfg() config.Migrator { + return config.Migrator{ + HealthCutoff: 0.99, + ParallelSlabsPerWorker: 1, + DownloadMaxOverdrive: 5, + DownloadOverdriveTimeout: 500 * time.Millisecond, + UploadOverdriveTimeout: 500 * time.Millisecond, + UploadMaxOverdrive: 5, } } diff --git a/worker/reader.go b/internal/utils/reader.go similarity index 96% rename from worker/reader.go rename to internal/utils/reader.go index f345a2629..498aa81e0 100644 --- a/worker/reader.go +++ b/internal/utils/reader.go @@ -1,4 +1,4 @@ -package worker +package utils import ( "bytes" diff --git a/stores/sql/types.go b/stores/sql/types.go index 0cf08ac7f..6268d8ea9 100644 --- a/stores/sql/types.go +++ b/stores/sql/types.go @@ -299,7 +299,7 @@ func (k *EncryptionKey) Scan(value interface{}) error { case string: bytes = []byte(v) default: - return errors.New(fmt.Sprintf("failed to unmarshal EncryptionKey value from %t", value)) + return fmt.Errorf("failed to unmarshal EncryptionKey value from %t", value) } var ec object.EncryptionKey if err := ec.UnmarshalBinary(bytes); err != nil { diff --git a/webhooks/webhooks.go b/webhooks/webhooks.go index ce643835c..411e6b2bb 100644 --- a/webhooks/webhooks.go +++ b/webhooks/webhooks.go @@ -161,9 +161,6 @@ func (m *Manager) Info() ([]Webhook, []WebhookQueueInfo) { } func (m *Manager) Register(ctx context.Context, wh Webhook) error { - ctx, cancel := context.WithTimeout(m.shutdownCtx, webhookTimeout) - defer cancel() - // Test URL. err := sendEvent(ctx, wh.URL, wh.Headers, Event{ Event: WebhookEventPing, diff --git a/worker/alerts.go b/worker/alerts.go index 28b64f6bc..435807897 100644 --- a/worker/alerts.go +++ b/worker/alerts.go @@ -6,16 +6,10 @@ import ( "go.sia.tech/core/types" "go.sia.tech/renterd/alerts" - "go.sia.tech/renterd/api" "go.sia.tech/renterd/internal/utils" - "go.sia.tech/renterd/object" "lukechampine.com/frand" ) -var ( - alertMigrationID = alerts.RandomAlertID() // constant until restarted -) - func randomAlertID() types.Hash256 { return frand.Entropy256() } @@ -37,46 +31,6 @@ func newDownloadFailedAlert(bucket, key string, offset, length, contracts int64, } } -func newMigrationFailedAlert(slabKey object.EncryptionKey, health float64, objects []api.ObjectMetadata, err error) alerts.Alert { - data := map[string]interface{}{ - "error": err.Error(), - "health": health, - "slabKey": slabKey.String(), - "hint": "Migration failures can be temporary, but if they persist it can eventually lead to data loss and should therefor be taken very seriously.", - } - - if len(objects) > 0 { - data["objects"] = objects - } - - hostErr := err - for errors.Unwrap(hostErr) != nil { - hostErr = errors.Unwrap(hostErr) - } - if set, ok := hostErr.(utils.HostErrorSet); ok { - hostErrors := make(map[string]string, len(set)) - for hk, err := range set { - hostErrors[hk.String()] = err.Error() - } - data["hosts"] = hostErrors - } - - severity := alerts.SeverityError - if health < 0.25 { - severity = alerts.SeverityCritical - } else if health < 0.5 { - severity = alerts.SeverityWarning - } - - return alerts.Alert{ - ID: alerts.IDForSlab(alertMigrationID, slabKey), - Severity: severity, - Message: "Slab migration failed", - Data: data, - Timestamp: time.Now(), - } -} - func newUploadFailedAlert(bucket, path, mimeType string, minShards, totalShards, contracts int, packing, multipart bool, err error) alerts.Alert { data := map[string]any{ "bucket": bucket, diff --git a/worker/client/client.go b/worker/client/client.go index af147a1da..d507965e9 100644 --- a/worker/client/client.go +++ b/worker/client/client.go @@ -39,7 +39,7 @@ func (c *Client) Account(ctx context.Context, hostKey types.PublicKey) (account // Accounts returns all accounts. func (c *Client) Accounts(ctx context.Context) (accounts []api.Account, err error) { - err = c.c.WithContext(ctx).GET(fmt.Sprintf("/accounts"), &accounts) + err = c.c.WithContext(ctx).GET("/accounts", &accounts) return } diff --git a/worker/host.go b/worker/host.go index 4c6008760..4064f4083 100644 --- a/worker/host.go +++ b/worker/host.go @@ -12,12 +12,13 @@ import ( "go.sia.tech/core/types" rhp "go.sia.tech/coreutils/rhp/v4" "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/accounts" "go.sia.tech/renterd/internal/gouging" "go.sia.tech/renterd/internal/host" "go.sia.tech/renterd/internal/prices" rhp3 "go.sia.tech/renterd/internal/rhp/v3" rhp4 "go.sia.tech/renterd/internal/rhp/v4" - "go.sia.tech/renterd/internal/worker" + "go.sia.tech/renterd/internal/utils" "go.uber.org/zap" ) @@ -27,7 +28,7 @@ type ( renterKey types.PrivateKey siamuxAddr string - acc *worker.Account + acc *accounts.Account csr ContractSpendingRecorder pts *prices.PriceTables rhp3 *rhp3.Client @@ -36,14 +37,14 @@ type ( hostDownloadClient struct { hi api.HostInfo - acc *worker.Account + acc *accounts.Account pts *prices.PriceTables rhp3 *rhp3.Client } hostV2DownloadClient struct { hi api.HostInfo - acc *worker.Account + acc *accounts.Account pts *prices.PricesCache rhp4 *rhp4.Client } @@ -53,7 +54,7 @@ type ( hi api.HostInfo rk types.PrivateKey - acc *worker.Account + acc *accounts.Account csr ContractSpendingRecorder pts *prices.PriceTables rhp3 *rhp3.Client @@ -64,7 +65,7 @@ type ( hi api.HostInfo rk types.PrivateKey - acc *worker.Account + acc *accounts.Account csr ContractSpendingRecorder pts *prices.PricesCache rhp4 *rhp4.Client @@ -305,7 +306,7 @@ func (c *hostUploadClient) UploadSector(ctx context.Context, sectorRoot types.Ha } hpt = pt.HostPriceTable - gc, err := GougingCheckerFromContext(ctx) + gc, err := gouging.CheckerFromContext(ctx) if err != nil { return cost, err } @@ -343,7 +344,7 @@ func (c *hostV2UploadClient) UploadSector(ctx context.Context, sectorRoot types. return types.ZeroCurrency, err } - res, err := c.rhp4.WriteSector(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), prices, c.acc.Token(), NewReaderLen(sector[:]), rhpv2.SectorSize, api.BlocksPerDay*3) + res, err := c.rhp4.WriteSector(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), prices, c.acc.Token(), utils.NewReaderLen(sector[:]), rhpv2.SectorSize, api.BlocksPerDay*3) if err != nil { return types.ZeroCurrency, fmt.Errorf("failed to write sector: %w", err) } diff --git a/worker/upload.go b/worker/upload.go index 91b5f4d4b..71e3944df 100644 --- a/worker/upload.go +++ b/worker/upload.go @@ -11,6 +11,7 @@ import ( "go.sia.tech/core/types" "go.sia.tech/renterd/api" + "go.sia.tech/renterd/internal/gouging" "go.sia.tech/renterd/internal/memory" "go.sia.tech/renterd/internal/upload" "go.uber.org/zap" @@ -187,7 +188,7 @@ func (w *Worker) uploadPackedSlab(ctx context.Context, mem memory.Memory, ps api } // attach gouging checker to the context - ctx = WithGougingChecker(ctx, w.bus, up.GougingParams) + ctx = gouging.WithChecker(ctx, w.bus, up.GougingParams) // upload packed slab err = w.uploadManager.UploadPackedSlab(ctx, rs, ps, mem, contracts, up.CurrentHeight) diff --git a/worker/worker.go b/worker/worker.go index f73ed59cf..7ac7702b4 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -24,6 +24,7 @@ import ( "go.sia.tech/renterd/api" "go.sia.tech/renterd/build" "go.sia.tech/renterd/config" + "go.sia.tech/renterd/internal/accounts" "go.sia.tech/renterd/internal/download" "go.sia.tech/renterd/internal/gouging" "go.sia.tech/renterd/internal/memory" @@ -70,7 +71,7 @@ type ( webhooks.Broadcaster AccountFunder - iworker.AccountStore + accounts.Store ContractLocker ContractStore @@ -176,7 +177,7 @@ type Worker struct { downloadManager *download.Manager uploadManager *upload.Manager - accounts *iworker.AccountMgr + accounts *accounts.Manager dialer *rhp.FallbackDialer cache iworker.WorkerCache priceTables *prices.PriceTables @@ -226,60 +227,6 @@ func (w *Worker) registerAlert(a alerts.Alert) { cancel() } -func (w *Worker) slabMigrateHandler(jc jape.Context) { - ctx := jc.Request.Context() - - // decode the slab - var slab object.Slab - if jc.Decode(&slab) != nil { - return - } - - // fetch the upload parameters - up, err := w.bus.UploadParams(ctx) - if jc.Check("couldn't fetch upload parameters from bus", err) != nil { - return - } - - // cancel the upload if consensus is not synced - if !up.ConsensusState.Synced { - w.logger.Errorf("migration cancelled, err: %v", api.ErrConsensusNotSynced) - jc.Error(api.ErrConsensusNotSynced, http.StatusServiceUnavailable) - return - } - - // attach gouging checker to the context - ctx = WithGougingChecker(ctx, w.bus, up.GougingParams) - - // fetch hosts - dlHosts, err := w.cache.UsableHosts(ctx) - if jc.Check("couldn't fetch hosts from bus", err) != nil { - return - } - - // fetch host & contract info - ulContracts, err := w.hostContracts(ctx) - if jc.Check("couldn't fetch contracts from bus", err) != nil { - return - } - - // migrate the slab and handle alerts - err = w.migrate(ctx, slab, dlHosts, ulContracts, up.CurrentHeight) - if err != nil && !utils.IsErr(err, api.ErrSlabNotFound) { - var objects []api.ObjectMetadata - if res, err := w.bus.Objects(ctx, "", api.ListObjectOptions{SlabEncryptionKey: slab.EncryptionKey}); err != nil { - w.logger.Errorf("failed to list objects for slab key; %v", err) - } else { - objects = res.Objects - } - w.alerts.RegisterAlert(ctx, newMigrationFailedAlert(slab.EncryptionKey, slab.Health, objects, err)) - } else if err == nil { - w.alerts.DismissAlerts(jc.Request.Context(), alerts.IDForSlab(alertMigrationID, slab.EncryptionKey)) - } - - jc.Check("failed to migrate slab", err) -} - func (w *Worker) downloadsStatsHandlerGET(jc jape.Context) { stats := w.downloadManager.Stats() @@ -624,7 +571,7 @@ func (w *Worker) accountsResetDriftHandlerPOST(jc jape.Context) { return } err := w.accounts.ResetDrift(id) - if errors.Is(err, iworker.ErrAccountNotFound) { + if errors.Is(err, accounts.ErrAccountNotFound) { jc.Error(err, http.StatusNotFound) return } @@ -726,8 +673,6 @@ func (w *Worker) Handler() http.Handler { "DELETE /object/*key": w.objectHandlerDELETE, "POST /objects/remove": w.objectsRemoveHandlerPOST, - "POST /slab/migrate": w.slabMigrateHandler, - "GET /state": w.stateHandlerGET, "GET /stats/downloads": w.downloadsStatsHandlerGET, @@ -793,11 +738,9 @@ func (w *Worker) FundAccount(ctx context.Context, fcid types.FileContractID, hk if balance.Cmp(desired) >= 0 { return types.ZeroCurrency, nil } - deposit := desired.Sub(balance) // fund the account - var err error - deposit, err = w.bus.FundAccount(ctx, acc.ID(), fcid, desired.Sub(balance)) + deposit, err := w.bus.FundAccount(ctx, acc.ID(), fcid, desired.Sub(balance)) if err != nil { if rhp3.IsBalanceMaxExceeded(err) { acc.ScheduleSync() @@ -852,7 +795,7 @@ func (w *Worker) GetObject(ctx context.Context, bucket, key string, opts api.Dow } else { // otherwise return a pipe reader downloadFn := func(wr io.Writer, offset, length int64) error { - ctx = WithGougingChecker(ctx, w.bus, gp) + ctx = gouging.WithChecker(ctx, w.bus, gp) err = w.downloadManager.DownloadObject(ctx, wr, obj, uint64(offset), uint64(length), hosts) if err != nil { w.logger.Error(err) @@ -898,7 +841,7 @@ func (w *Worker) SyncAccount(ctx context.Context, fcid types.FileContractID, hos if err != nil { return fmt.Errorf("couldn't get gouging parameters; %w", err) } - ctx = WithGougingChecker(ctx, w.bus, gp) + ctx = gouging.WithChecker(ctx, w.bus, gp) // sync the account h := w.Host(host.PublicKey, fcid, host.SiamuxAddr) @@ -919,7 +862,7 @@ func (w *Worker) UploadObject(ctx context.Context, r io.Reader, bucket, key stri } // attach gouging checker to the context - ctx = WithGougingChecker(ctx, w.bus, up.GougingParams) + ctx = gouging.WithChecker(ctx, w.bus, up.GougingParams) // fetch host & contract info contracts, err := w.hostContracts(ctx) @@ -960,7 +903,7 @@ func (w *Worker) UploadMultipartUploadPart(ctx context.Context, r io.Reader, buc } // attach gouging checker to the context - ctx = WithGougingChecker(ctx, w.bus, up.GougingParams) + ctx = gouging.WithChecker(ctx, w.bus, up.GougingParams) // prepare opts uploadOpts := []upload.Option{ @@ -1004,7 +947,7 @@ func (w *Worker) initAccounts(refillInterval time.Duration) (err error) { if w.accounts != nil { panic("priceTables already initialized") // developer error } - w.accounts, err = iworker.NewAccountManager(w.masterKey.DeriveAccountsKey(w.id), w.id, w.bus, w, w, w.bus, w.bus, w.bus, w.bus, refillInterval, w.logger.Desugar()) + w.accounts, err = accounts.NewManager(w.masterKey.DeriveAccountsKey(w.id), w.id, w.bus, w, w, w.bus, w.bus, w.bus, w.bus, refillInterval, w.logger.Desugar()) return err } From df9976d8ba4a10c8928621dd10e86f3abf05289d Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 10 Dec 2024 13:07:21 +0100 Subject: [PATCH 07/14] testing: fix TestDownloadAllHosts --- autopilot/migrator/migrator.go | 7 ++++++- cmd/renterd/config.go | 2 ++ config/config.go | 1 + internal/test/e2e/cluster.go | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/autopilot/migrator/migrator.go b/autopilot/migrator/migrator.go index 42117570f..b22ef70b1 100644 --- a/autopilot/migrator/migrator.go +++ b/autopilot/migrator/migrator.go @@ -2,6 +2,7 @@ package migrator import ( "context" + "fmt" "math" "net" "sort" @@ -122,6 +123,10 @@ type ( func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, alerts alerts.Alerter, ss SlabStore, b Bus, logger *zap.Logger) (*migrator, error) { logger = logger.Named("migrator") + if cfg.AccountsRefillInterval == 0 { + return nil, fmt.Errorf("accounts refill interval must be set") + } + dialer := rhp.NewFallbackDialer(b, net.Dialer{}, logger) m := &migrator{ alerts: alerts, @@ -149,7 +154,7 @@ func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, al logger: logger.Sugar(), } - mgr, err := accounts.NewManager(masterKey.DeriveAccountsKey("migrator"), "migrator", alerts, m, m, b, b, b, b, time.Minute, logger) + mgr, err := accounts.NewManager(masterKey.DeriveAccountsKey("migrator"), "migrator", alerts, m, m, b, b, b, b, cfg.AccountsRefillInterval, logger) if err != nil { return nil, err } diff --git a/cmd/renterd/config.go b/cmd/renterd/config.go index fa6c37652..2b4774fab 100644 --- a/cmd/renterd/config.go +++ b/cmd/renterd/config.go @@ -115,6 +115,7 @@ func defaultConfig() config.Config { ScannerNumThreads: 10, }, Migrator: config.Migrator{ + AccountsRefillInterval: defaultAccountRefillInterval, HealthCutoff: 0.75, ParallelSlabsPerWorker: 1, DownloadMaxOverdrive: 5, @@ -301,6 +302,7 @@ func parseCLIFlags(cfg *config.Config) { flag.DurationVar(&cfg.ShutdownTimeout, "node.shutdownTimeout", cfg.ShutdownTimeout, "Timeout for node shutdown") // migrator + flag.DurationVar(&cfg.Migrator.AccountsRefillInterval, "migrator.accountRefillInterval", cfg.Migrator.AccountsRefillInterval, "Interval for refilling migrator' account balances") flag.Float64Var(&cfg.Migrator.HealthCutoff, "migrator.healthCutoff", cfg.Migrator.HealthCutoff, "Threshold for migrating slabs based on health") flag.Uint64Var(&cfg.Migrator.ParallelSlabsPerWorker, "migrator.parallelSlabsPerWorker", cfg.Migrator.ParallelSlabsPerWorker, "Parallel slab migrations per worker (overrides with RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER)") flag.Uint64Var(&cfg.Migrator.DownloadMaxOverdrive, "migrator.downloadMaxOverdrive", cfg.Migrator.DownloadMaxOverdrive, "Max overdrive workers for migration downloads") diff --git a/config/config.go b/config/config.go index 40b77ce09..5b4fd0147 100644 --- a/config/config.go +++ b/config/config.go @@ -141,6 +141,7 @@ type ( // Migrator contains the configuration for a migrator. Migrator struct { + AccountsRefillInterval time.Duration `yaml:"accountsRefillInterval,omitempty"` DownloadMaxOverdrive uint64 `yaml:"downloadMaxOverdrive,omitempty"` DownloadOverdriveTimeout time.Duration `yaml:"downloadOverdriveTimeout,omitempty"` HealthCutoff float64 `yaml:"healthCutoff,omitempty"` diff --git a/internal/test/e2e/cluster.go b/internal/test/e2e/cluster.go index 8b947c0a8..7fef4d7fa 100644 --- a/internal/test/e2e/cluster.go +++ b/internal/test/e2e/cluster.go @@ -1014,6 +1014,7 @@ func testApCfg() config.Autopilot { func testMigratorCfg() config.Migrator { return config.Migrator{ + AccountsRefillInterval: 10 * time.Millisecond, HealthCutoff: 0.99, ParallelSlabsPerWorker: 1, DownloadMaxOverdrive: 5, From a933e31b7bb65389c5b26132031e35c812d260ef Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 10 Dec 2024 13:14:01 +0100 Subject: [PATCH 08/14] docs: add changelog --- .changeset/move_migrations_to_the_autopilot.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/move_migrations_to_the_autopilot.md diff --git a/.changeset/move_migrations_to_the_autopilot.md b/.changeset/move_migrations_to_the_autopilot.md new file mode 100644 index 000000000..3593ee723 --- /dev/null +++ b/.changeset/move_migrations_to_the_autopilot.md @@ -0,0 +1,5 @@ +--- +default: major +--- + +# Move migrations to the autopilot From 8aa2f272f875d4bb9433e6e17e2a25fa2f0be8b8 Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 10 Dec 2024 13:32:10 +0100 Subject: [PATCH 09/14] worker: remove migrate slab from worker API --- openapi.yml | 29 ----------------------------- worker/client/client.go | 6 ------ 2 files changed, 35 deletions(-) diff --git a/openapi.yml b/openapi.yml index f82156e8b..277d40897 100644 --- a/openapi.yml +++ b/openapi.yml @@ -581,35 +581,6 @@ paths: schema: type: string - /worker/slab/migrate: - post: - summary: Migrate a slab - description: Migrates a slab from one host to another. - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/Slab" - responses: - "200": - description: Successfully migrated slab - content: - text/plain: - schema: - type: string - "500": - description: Internal server error - content: - text/plain: - schema: - type: string - "503": - description: Consensus isn't synced - content: - text/plain: - schema: - type: string - /worker/state: get: summary: Get the worker's state. diff --git a/worker/client/client.go b/worker/client/client.go index d507965e9..c74785e06 100644 --- a/worker/client/client.go +++ b/worker/client/client.go @@ -15,7 +15,6 @@ import ( "go.sia.tech/jape" "go.sia.tech/renterd/api" "go.sia.tech/renterd/internal/utils" - "go.sia.tech/renterd/object" ) // A Client provides methods for interacting with a worker. @@ -148,11 +147,6 @@ func (c *Client) Memory(ctx context.Context) (resp api.MemoryResponse, err error return } -// MigrateSlab migrates the specified slab. -func (c *Client) MigrateSlab(ctx context.Context, slab object.Slab) error { - return c.c.WithContext(ctx).POST("/slab/migrate", slab, nil) -} - // RemoveObjects removes the object with given prefix. func (c *Client) RemoveObjects(ctx context.Context, bucket, prefix string) (err error) { err = c.c.WithContext(ctx).POST("/objects/remove", api.ObjectsRemoveRequest{ From b9cffadaaa559ae8d70dd8dd02e2084d877bb126 Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 10 Dec 2024 15:01:46 +0100 Subject: [PATCH 10/14] internal: deduplicate spending and host manager --- autopilot/migrator/accounts.go | 4 +- autopilot/migrator/migrator.go | 47 +-- autopilot/migrator/spending.go | 121 ------ {worker => internal/contracts}/spending.go | 30 +- internal/download/downloadmanager.go | 6 +- internal/host/host.go | 6 - .../hosts.go => internal/hosts/manager.go | 91 ++++- internal/upload/uploader/uploader.go | 5 +- internal/upload/uploadmanager.go | 6 +- internal/upload/uploadmanager_test.go | 6 +- worker/host.go | 369 ------------------ worker/host_test.go | 4 +- worker/worker.go | 44 +-- 13 files changed, 134 insertions(+), 605 deletions(-) delete mode 100644 autopilot/migrator/spending.go rename {worker => internal/contracts}/spending.go (88%) rename autopilot/migrator/hosts.go => internal/hosts/manager.go (86%) delete mode 100644 worker/host.go diff --git a/autopilot/migrator/accounts.go b/autopilot/migrator/accounts.go index 3db639189..80ee08339 100644 --- a/autopilot/migrator/accounts.go +++ b/autopilot/migrator/accounts.go @@ -65,7 +65,7 @@ func (m *migrator) SyncAccount(ctx context.Context, fcid types.FileContractID, h ctx = gouging.WithChecker(ctx, m.bus, gp) // sync the account - h := m.Host(host.PublicKey, fcid, host.SiamuxAddr) + h := m.hostManager.Host(host.PublicKey, fcid, host.SiamuxAddr) err = m.withRevision(ctx, fcid, host.PublicKey, host.SiamuxAddr, defaultRevisionFetchTimeout, lockingPrioritySyncing, func(rev types.FileContractRevision) error { return h.SyncAccount(ctx, &rev) }) @@ -83,7 +83,7 @@ func (m *migrator) withRevision(ctx context.Context, fcid types.FileContractID, defer cancel() } - rev, err := m.Host(hk, fcid, siamuxAddr).FetchRevision(ctx, fcid) + rev, err := m.hostManager.Host(hk, fcid, siamuxAddr).FetchRevision(ctx, fcid) if err != nil { return err } diff --git a/autopilot/migrator/migrator.go b/autopilot/migrator/migrator.go index b22ef70b1..296c3801a 100644 --- a/autopilot/migrator/migrator.go +++ b/autopilot/migrator/migrator.go @@ -15,11 +15,11 @@ import ( "go.sia.tech/renterd/api" "go.sia.tech/renterd/config" "go.sia.tech/renterd/internal/accounts" + "go.sia.tech/renterd/internal/contracts" "go.sia.tech/renterd/internal/download" + "go.sia.tech/renterd/internal/hosts" "go.sia.tech/renterd/internal/memory" - "go.sia.tech/renterd/internal/prices" "go.sia.tech/renterd/internal/rhp" - rhp3 "go.sia.tech/renterd/internal/rhp/v3" rhp4 "go.sia.tech/renterd/internal/rhp/v4" "go.sia.tech/renterd/internal/upload" "go.sia.tech/renterd/internal/utils" @@ -92,18 +92,13 @@ type ( masterKey utils.MasterKey - contractSpendingRecorder *contractSpendingRecorder - + accounts *accounts.Manager downloadManager *download.Manager uploadManager *upload.Manager + hostManager hosts.Manager - rhp3Client *rhp3.Client rhp4Client *rhp4.Client - accounts *accounts.Manager - priceTables *prices.PriceTables - pricesCache *prices.PricesCache - signalConsensusNotSynced chan struct{} signalMaintenanceFinished chan struct{} @@ -127,7 +122,6 @@ func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, al return nil, fmt.Errorf("accounts refill interval must be set") } - dialer := rhp.NewFallbackDialer(b, net.Dialer{}, logger) m := &migrator{ alerts: alerts, bus: b, @@ -138,12 +132,6 @@ func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, al masterKey: masterKey, - rhp3Client: rhp3.New(dialer, logger), - rhp4Client: rhp4.New(dialer), - - priceTables: prices.NewPriceTables(), - pricesCache: prices.NewPricesCache(), - signalConsensusNotSynced: make(chan struct{}, 1), signalMaintenanceFinished: make(chan struct{}, 1), @@ -154,26 +142,25 @@ func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, al logger: logger.Sugar(), } - mgr, err := accounts.NewManager(masterKey.DeriveAccountsKey("migrator"), "migrator", alerts, m, m, b, b, b, b, cfg.AccountsRefillInterval, logger) + // create account manager + am, err := accounts.NewManager(masterKey.DeriveAccountsKey("migrator"), "migrator", alerts, m, m, b, b, b, b, cfg.AccountsRefillInterval, logger) if err != nil { return nil, err } - m.accounts = mgr + m.accounts = am + + // create host manager + dialer := rhp.NewFallbackDialer(b, net.Dialer{}, logger) + csr := contracts.NewSpendingRecorder(ctx, b, 5*time.Second, logger) + hm := hosts.NewManager(masterKey, am, csr, dialer, logger) + m.hostManager = hm + m.rhp4Client = rhp4.New(dialer) + // create upload & download manager mm := memory.NewManager(math.MaxInt64, logger) uk := masterKey.DeriveUploadKey() - m.downloadManager = download.NewManager(ctx, &uk, m, mm, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, logger) - m.uploadManager = upload.NewManager(ctx, &uk, m, mm, b, b, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, logger) - - m.contractSpendingRecorder = &contractSpendingRecorder{ - bus: b, - logger: logger.Named("spending").Sugar(), - - flushCtx: ctx, - flushInterval: 5 * time.Second, // TODO: can be removed once we've moved it to the bus - - contractSpendings: make(map[types.FileContractID]api.ContractSpendingRecord), - } + m.downloadManager = download.NewManager(ctx, &uk, hm, mm, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, logger) + m.uploadManager = upload.NewManager(ctx, &uk, hm, mm, b, b, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, logger) return m, nil } diff --git a/autopilot/migrator/spending.go b/autopilot/migrator/spending.go deleted file mode 100644 index d4c96cf74..000000000 --- a/autopilot/migrator/spending.go +++ /dev/null @@ -1,121 +0,0 @@ -package migrator - -import ( - "context" - "fmt" - "sync" - "time" - - "go.sia.tech/core/types" - rhp "go.sia.tech/coreutils/rhp/v4" - "go.sia.tech/renterd/api" - "go.uber.org/zap" -) - -type ( - ContractSpendingRecorder interface { - RecordV1(types.FileContractRevision, api.ContractSpending) - RecordV2(rhp.ContractRevision, api.ContractSpending) - Stop(context.Context) - } - - contractSpendingRecorder struct { - flushInterval time.Duration - - bus Bus - logger *zap.SugaredLogger - - mu sync.Mutex - contractSpendings map[types.FileContractID]api.ContractSpendingRecord - - flushCtx context.Context - flushTimer *time.Timer - } -) - -var ( - _ ContractSpendingRecorder = (*contractSpendingRecorder)(nil) -) - -// RecordV1 stores the given contract spending record until it gets flushed to the bus. -func (r *contractSpendingRecorder) RecordV1(rev types.FileContractRevision, cs api.ContractSpending) { - r.record(rev.ParentID, rev.RevisionNumber, rev.Filesize, rev.ValidRenterPayout(), rev.MissedHostPayout(), cs) -} - -// RecordV2 stores the given contract spending record until it gets flushed to the bus. -func (r *contractSpendingRecorder) RecordV2(rev rhp.ContractRevision, cs api.ContractSpending) { - r.record(rev.ID, rev.Revision.RevisionNumber, rev.Revision.Filesize, rev.Revision.RenterOutput.Value, rev.Revision.HostOutput.Value, cs) -} - -// Stop stops the flush timer and flushes one last time. -func (r *contractSpendingRecorder) Stop(ctx context.Context) { - // stop the flush timer - r.mu.Lock() - if r.flushTimer != nil { - r.flushTimer.Stop() - } - r.flushCtx = ctx - r.mu.Unlock() - - // flush all interactions - r.flush() - - // log if we weren't able to flush them - r.mu.Lock() - if len(r.contractSpendings) > 0 { - r.logger.Errorw(fmt.Sprintf("failed to record %d contract spendings on worker shutdown", len(r.contractSpendings))) - } - r.mu.Unlock() -} - -func (r *contractSpendingRecorder) flush() { - r.mu.Lock() - defer r.mu.Unlock() - - // NOTE: don't bother flushing if the context is cancelled, we can safely - // ignore the buffered records since we'll flush on shutdown and log in case - // we weren't able to flush all spendings o the bus - select { - case <-r.flushCtx.Done(): - r.flushTimer = nil - return - default: - } - - if len(r.contractSpendings) > 0 { - records := make([]api.ContractSpendingRecord, 0, len(r.contractSpendings)) - for _, cs := range r.contractSpendings { - records = append(records, cs) - } - if err := r.bus.RecordContractSpending(r.flushCtx, records); err != nil { - r.logger.Errorw(fmt.Sprintf("failed to record contract spending: %v", err)) - } else { - r.contractSpendings = make(map[types.FileContractID]api.ContractSpendingRecord) - } - } - r.flushTimer = nil -} - -func (r *contractSpendingRecorder) record(fcid types.FileContractID, revisionNumber, size uint64, validRenterPayout, missedHostPayout types.Currency, cs api.ContractSpending) { - r.mu.Lock() - defer r.mu.Unlock() - - // record the spending - csr, found := r.contractSpendings[fcid] - if !found { - csr = api.ContractSpendingRecord{ContractID: fcid} - } - csr.ContractSpending = csr.ContractSpending.Add(cs) - if revisionNumber > csr.RevisionNumber { - csr.RevisionNumber = revisionNumber - csr.Size = size - csr.ValidRenterPayout = validRenterPayout - csr.MissedHostPayout = missedHostPayout - } - r.contractSpendings[fcid] = csr - - // schedule flush - if r.flushTimer == nil { - r.flushTimer = time.AfterFunc(r.flushInterval, r.flush) - } -} diff --git a/worker/spending.go b/internal/contracts/spending.go similarity index 88% rename from worker/spending.go rename to internal/contracts/spending.go index 8116def7c..f730a381e 100644 --- a/worker/spending.go +++ b/internal/contracts/spending.go @@ -1,4 +1,4 @@ -package worker +package contracts import ( "context" @@ -12,8 +12,16 @@ import ( "go.uber.org/zap" ) +var ( + _ SpendingRecorder = (*contractSpendingRecorder)(nil) +) + type ( - ContractSpendingRecorder interface { + Bus interface { + RecordContractSpending(ctx context.Context, records []api.ContractSpendingRecord) error + } + + SpendingRecorder interface { RecordV1(types.FileContractRevision, api.ContractSpending) RecordV2(rhp.ContractRevision, api.ContractSpending) Stop(context.Context) @@ -33,19 +41,13 @@ type ( } ) -var ( - _ ContractSpendingRecorder = (*contractSpendingRecorder)(nil) -) - -func (w *Worker) initContractSpendingRecorder(flushInterval time.Duration) { - if w.contractSpendingRecorder != nil { - panic("ContractSpendingRecorder already initialized") // developer error - } - w.contractSpendingRecorder = &contractSpendingRecorder{ - bus: w.bus, - logger: w.logger, +func NewSpendingRecorder(ctx context.Context, b Bus, flushInterval time.Duration, logger *zap.Logger) SpendingRecorder { + logger = logger.Named("spending") + return &contractSpendingRecorder{ + bus: b, + logger: logger.Sugar(), - flushCtx: w.shutdownCtx, + flushCtx: ctx, flushInterval: flushInterval, contractSpendings: make(map[types.FileContractID]api.ContractSpendingRecord), diff --git a/internal/download/downloadmanager.go b/internal/download/downloadmanager.go index f00cb4f0d..5201f74f8 100644 --- a/internal/download/downloadmanager.go +++ b/internal/download/downloadmanager.go @@ -15,7 +15,7 @@ import ( "go.sia.tech/core/types" "go.sia.tech/renterd/api" "go.sia.tech/renterd/internal/download/downloader" - "go.sia.tech/renterd/internal/host" + "go.sia.tech/renterd/internal/hosts" "go.sia.tech/renterd/internal/memory" rhp3 "go.sia.tech/renterd/internal/rhp/v3" "go.sia.tech/renterd/internal/utils" @@ -43,7 +43,7 @@ var ( type ( Manager struct { - hm host.HostManager + hm hosts.Manager mm memory.MemoryManager os ObjectStore uploadKey *utils.UploadKey @@ -115,7 +115,7 @@ func (s *sectorInfo) selectHost(h types.PublicKey) { } } -func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, mm memory.MemoryManager, os ObjectStore, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { +func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm hosts.Manager, mm memory.MemoryManager, os ObjectStore, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { logger = logger.Named("downloadmanager") return &Manager{ hm: hm, diff --git a/internal/host/host.go b/internal/host/host.go index 307108453..3e573ebbb 100644 --- a/internal/host/host.go +++ b/internal/host/host.go @@ -29,10 +29,4 @@ type ( FundAccount(ctx context.Context, balance types.Currency, rev *types.FileContractRevision) error SyncAccount(ctx context.Context, rev *types.FileContractRevision) error } - - HostManager interface { - Downloader(hi api.HostInfo) Downloader - Uploader(hi api.HostInfo, fcid types.FileContractID) Uploader - Host(hk types.PublicKey, fcid types.FileContractID, siamuxAddr string) Host - } ) diff --git a/autopilot/migrator/hosts.go b/internal/hosts/manager.go similarity index 86% rename from autopilot/migrator/hosts.go rename to internal/hosts/manager.go index 9504c63a2..67d090061 100644 --- a/autopilot/migrator/hosts.go +++ b/internal/hosts/manager.go @@ -1,35 +1,74 @@ -package migrator +package hosts import ( "context" "fmt" "io" "math" + "net" - rhpv2 "go.sia.tech/core/rhp/v2" - rhpv3 "go.sia.tech/core/rhp/v3" - rhpv4 "go.sia.tech/core/rhp/v4" "go.sia.tech/core/types" - rhp "go.sia.tech/coreutils/rhp/v4" "go.sia.tech/renterd/api" "go.sia.tech/renterd/internal/accounts" + "go.sia.tech/renterd/internal/contracts" "go.sia.tech/renterd/internal/gouging" "go.sia.tech/renterd/internal/host" "go.sia.tech/renterd/internal/prices" - rhp3 "go.sia.tech/renterd/internal/rhp/v3" - rhp4 "go.sia.tech/renterd/internal/rhp/v4" "go.sia.tech/renterd/internal/utils" "go.uber.org/zap" + + rhpv2 "go.sia.tech/core/rhp/v2" + rhpv3 "go.sia.tech/core/rhp/v3" + rhpv4 "go.sia.tech/core/rhp/v4" + + rhp3 "go.sia.tech/renterd/internal/rhp/v3" + rhp4 "go.sia.tech/renterd/internal/rhp/v4" + + rhp "go.sia.tech/coreutils/rhp/v4" +) + +var ( + _ host.Host = (*hostClient)(nil) + _ Manager = (*hostManager)(nil) ) type ( + AccountStore interface { + ForHost(pk types.PublicKey) *accounts.Account + } + + Dialer interface { + Dial(ctx context.Context, hk types.PublicKey, address string) (net.Conn, error) + } + + Manager interface { + Downloader(hi api.HostInfo) host.Downloader + Uploader(hi api.HostInfo, fcid types.FileContractID) host.Uploader + Host(hk types.PublicKey, fcid types.FileContractID, siamuxAddr string) host.Host + } +) + +type ( + hostManager struct { + masterKey utils.MasterKey + + rhp3Client *rhp3.Client + rhp4Client *rhp4.Client + + accounts AccountStore + contracts contracts.SpendingRecorder + priceTables *prices.PriceTables + pricesCache *prices.PricesCache + logger *zap.SugaredLogger + } + hostClient struct { hk types.PublicKey renterKey types.PrivateKey siamuxAddr string acc *accounts.Account - csr ContractSpendingRecorder + csr contracts.SpendingRecorder pts *prices.PriceTables rhp3 *rhp3.Client logger *zap.SugaredLogger @@ -55,7 +94,7 @@ type ( rk types.PrivateKey acc *accounts.Account - csr ContractSpendingRecorder + csr contracts.SpendingRecorder pts *prices.PriceTables rhp3 *rhp3.Client } @@ -66,23 +105,35 @@ type ( rk types.PrivateKey acc *accounts.Account - csr ContractSpendingRecorder + csr contracts.SpendingRecorder pts *prices.PricesCache rhp4 *rhp4.Client } ) -var ( - _ host.Host = (*hostClient)(nil) - _ host.HostManager = (*migrator)(nil) -) +func NewManager(masterKey utils.MasterKey, as AccountStore, csr contracts.SpendingRecorder, dialer Dialer, logger *zap.Logger) Manager { + logger = logger.Named("hostmanager") + return &hostManager{ + masterKey: masterKey, + + rhp3Client: rhp3.New(dialer, logger), + rhp4Client: rhp4.New(dialer), + + accounts: as, + contracts: csr, + priceTables: prices.NewPriceTables(), + pricesCache: prices.NewPricesCache(), + + logger: logger.Sugar(), + } +} -func (m *migrator) Host(hk types.PublicKey, fcid types.FileContractID, siamuxAddr string) host.Host { +func (m *hostManager) Host(hk types.PublicKey, fcid types.FileContractID, siamuxAddr string) host.Host { return &hostClient{ rhp3: m.rhp3Client, hk: hk, acc: m.accounts.ForHost(hk), - csr: m.contractSpendingRecorder, + csr: m.contracts, logger: m.logger.Named(hk.String()[:4]), siamuxAddr: siamuxAddr, renterKey: m.masterKey.DeriveContractKey(hk), @@ -90,7 +141,7 @@ func (m *migrator) Host(hk types.PublicKey, fcid types.FileContractID, siamuxAdd } } -func (m *migrator) Downloader(hi api.HostInfo) host.Downloader { +func (m *hostManager) Downloader(hi api.HostInfo) host.Downloader { if hi.IsV2() { return &hostV2DownloadClient{ hi: hi, @@ -107,7 +158,7 @@ func (m *migrator) Downloader(hi api.HostInfo) host.Downloader { } } -func (m *migrator) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Uploader { +func (m *hostManager) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Uploader { if hi.IsV2() { return &hostV2UploadClient{ fcid: fcid, @@ -115,7 +166,7 @@ func (m *migrator) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Upl rk: m.masterKey.DeriveContractKey(hi.PublicKey), acc: m.accounts.ForHost(hi.PublicKey), - csr: m.contractSpendingRecorder, + csr: m.contracts, pts: m.pricesCache, rhp4: m.rhp4Client, } @@ -126,7 +177,7 @@ func (m *migrator) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Upl rk: m.masterKey.DeriveContractKey(hi.PublicKey), acc: m.accounts.ForHost(hi.PublicKey), - csr: m.contractSpendingRecorder, + csr: m.contracts, pts: m.priceTables, rhp3: m.rhp3Client, } diff --git a/internal/upload/uploader/uploader.go b/internal/upload/uploader/uploader.go index 9b79070f6..61f2bc904 100644 --- a/internal/upload/uploader/uploader.go +++ b/internal/upload/uploader/uploader.go @@ -11,6 +11,7 @@ import ( "go.sia.tech/core/types" "go.sia.tech/renterd/api" "go.sia.tech/renterd/internal/host" + "go.sia.tech/renterd/internal/hosts" "go.sia.tech/renterd/internal/locking" rhp3 "go.sia.tech/renterd/internal/rhp/v3" "go.sia.tech/renterd/internal/utils" @@ -72,7 +73,7 @@ type ( Uploader struct { cs ContractStore cl locking.ContractLocker - hm host.HostManager + hm hosts.Manager logger *zap.SugaredLogger hk types.PublicKey @@ -95,7 +96,7 @@ type ( } ) -func New(ctx context.Context, cl locking.ContractLocker, cs ContractStore, hm host.HostManager, hi api.HostInfo, fcid types.FileContractID, endHeight uint64, l *zap.SugaredLogger) *Uploader { +func New(ctx context.Context, cl locking.ContractLocker, cs ContractStore, hm hosts.Manager, hi api.HostInfo, fcid types.FileContractID, endHeight uint64, l *zap.SugaredLogger) *Uploader { return &Uploader{ cl: cl, cs: cs, diff --git a/internal/upload/uploadmanager.go b/internal/upload/uploadmanager.go index feca91886..130be696e 100644 --- a/internal/upload/uploadmanager.go +++ b/internal/upload/uploadmanager.go @@ -16,7 +16,7 @@ import ( "go.sia.tech/core/types" "go.sia.tech/renterd/api" - "go.sia.tech/renterd/internal/host" + "go.sia.tech/renterd/internal/hosts" "go.sia.tech/renterd/internal/memory" "go.sia.tech/renterd/internal/upload/uploader" "go.sia.tech/renterd/internal/utils" @@ -63,7 +63,7 @@ type ( } Manager struct { - hm host.HostManager + hm hosts.Manager mm memory.MemoryManager os ObjectStore cl ContractLocker @@ -150,7 +150,7 @@ type ( } ) -func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm host.HostManager, mm memory.MemoryManager, os ObjectStore, cl ContractLocker, cs uploader.ContractStore, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { +func NewManager(ctx context.Context, uploadKey *utils.UploadKey, hm hosts.Manager, mm memory.MemoryManager, os ObjectStore, cl ContractLocker, cs uploader.ContractStore, maxOverdrive uint64, overdriveTimeout time.Duration, logger *zap.Logger) *Manager { logger = logger.Named("uploadmanager") return &Manager{ hm: hm, diff --git a/internal/upload/uploadmanager_test.go b/internal/upload/uploadmanager_test.go index 4447ec609..043d6114b 100644 --- a/internal/upload/uploadmanager_test.go +++ b/internal/upload/uploadmanager_test.go @@ -12,8 +12,10 @@ import ( type hostManager struct{} -func (hm *hostManager) Downloader(hi api.HostInfo) host.Downloader { return nil } -func (hm *hostManager) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Uploader { return nil } +func (hm *hostManager) Downloader(hi api.HostInfo) host.Downloader { return nil } +func (hm *hostManager) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Uploader { + return nil +} func (hm *hostManager) Host(hk types.PublicKey, fcid types.FileContractID, siamuxAddr string) host.Host { return nil } diff --git a/worker/host.go b/worker/host.go deleted file mode 100644 index 4064f4083..000000000 --- a/worker/host.go +++ /dev/null @@ -1,369 +0,0 @@ -package worker - -import ( - "context" - "fmt" - "io" - "math" - - rhpv2 "go.sia.tech/core/rhp/v2" - rhpv3 "go.sia.tech/core/rhp/v3" - rhpv4 "go.sia.tech/core/rhp/v4" - "go.sia.tech/core/types" - rhp "go.sia.tech/coreutils/rhp/v4" - "go.sia.tech/renterd/api" - "go.sia.tech/renterd/internal/accounts" - "go.sia.tech/renterd/internal/gouging" - "go.sia.tech/renterd/internal/host" - "go.sia.tech/renterd/internal/prices" - rhp3 "go.sia.tech/renterd/internal/rhp/v3" - rhp4 "go.sia.tech/renterd/internal/rhp/v4" - "go.sia.tech/renterd/internal/utils" - "go.uber.org/zap" -) - -type ( - hostClient struct { - hk types.PublicKey - renterKey types.PrivateKey - siamuxAddr string - - acc *accounts.Account - csr ContractSpendingRecorder - pts *prices.PriceTables - rhp3 *rhp3.Client - logger *zap.SugaredLogger - } - - hostDownloadClient struct { - hi api.HostInfo - acc *accounts.Account - pts *prices.PriceTables - rhp3 *rhp3.Client - } - - hostV2DownloadClient struct { - hi api.HostInfo - acc *accounts.Account - pts *prices.PricesCache - rhp4 *rhp4.Client - } - - hostUploadClient struct { - fcid types.FileContractID - hi api.HostInfo - rk types.PrivateKey - - acc *accounts.Account - csr ContractSpendingRecorder - pts *prices.PriceTables - rhp3 *rhp3.Client - } - - hostV2UploadClient struct { - fcid types.FileContractID - hi api.HostInfo - rk types.PrivateKey - - acc *accounts.Account - csr ContractSpendingRecorder - pts *prices.PricesCache - rhp4 *rhp4.Client - } -) - -var ( - _ host.Host = (*hostClient)(nil) - _ host.HostManager = (*Worker)(nil) -) - -func (w *Worker) Host(hk types.PublicKey, fcid types.FileContractID, siamuxAddr string) host.Host { - return &hostClient{ - rhp3: w.rhp3Client, - hk: hk, - acc: w.accounts.ForHost(hk), - csr: w.contractSpendingRecorder, - logger: w.logger.Named(hk.String()[:4]), - siamuxAddr: siamuxAddr, - renterKey: w.deriveRenterKey(hk), - pts: w.priceTables, - } -} - -func (w *Worker) Downloader(hi api.HostInfo) host.Downloader { - if hi.IsV2() { - return &hostV2DownloadClient{ - hi: hi, - acc: w.accounts.ForHost(hi.PublicKey), - pts: w.pricesCache, - rhp4: w.rhp4Client, - } - } - return &hostDownloadClient{ - hi: hi, - acc: w.accounts.ForHost(hi.PublicKey), - pts: w.priceTables, - rhp3: w.rhp3Client, - } -} - -func (w *Worker) Uploader(hi api.HostInfo, fcid types.FileContractID) host.Uploader { - if hi.IsV2() { - return &hostV2UploadClient{ - fcid: fcid, - hi: hi, - rk: w.deriveRenterKey(hi.PublicKey), - - acc: w.accounts.ForHost(hi.PublicKey), - csr: w.contractSpendingRecorder, - pts: w.pricesCache, - rhp4: w.rhp4Client, - } - } - return &hostUploadClient{ - fcid: fcid, - hi: hi, - rk: w.deriveRenterKey(hi.PublicKey), - - acc: w.accounts.ForHost(hi.PublicKey), - csr: w.contractSpendingRecorder, - pts: w.priceTables, - rhp3: w.rhp3Client, - } -} - -func (c *hostClient) PublicKey() types.PublicKey { return c.hk } -func (c *hostDownloadClient) PublicKey() types.PublicKey { return c.hi.PublicKey } -func (c *hostV2DownloadClient) PublicKey() types.PublicKey { return c.hi.PublicKey } -func (c *hostUploadClient) PublicKey() types.PublicKey { return c.hi.PublicKey } -func (c *hostV2UploadClient) PublicKey() types.PublicKey { return c.hi.PublicKey } - -func (h *hostClient) PriceTableUnpaid(ctx context.Context) (api.HostPriceTable, error) { - return h.rhp3.PriceTableUnpaid(ctx, h.hk, h.siamuxAddr) -} - -func (h *hostClient) PriceTable(ctx context.Context, rev *types.FileContractRevision) (hpt api.HostPriceTable, cost types.Currency, err error) { - // fetchPT is a helper function that performs the RPC given a payment function - fetchPT := func(paymentFn rhp3.PriceTablePaymentFunc) (api.HostPriceTable, error) { - return h.rhp3.PriceTable(ctx, h.hk, h.siamuxAddr, paymentFn) - } - - // fetch the price table - if rev != nil { - hpt, err = fetchPT(rhp3.PreparePriceTableContractPayment(rev, h.acc.ID(), h.renterKey)) - } else { - hpt, err = fetchPT(rhp3.PreparePriceTableAccountPayment(h.acc.Key())) - } - - // set the cost - if err == nil { - cost = hpt.UpdatePriceTableCost - } - return -} - -// FetchRevision tries to fetch a contract revision from the host. -func (h *hostClient) FetchRevision(ctx context.Context, fcid types.FileContractID) (types.FileContractRevision, error) { - return h.rhp3.Revision(ctx, fcid, h.hk, h.siamuxAddr) -} - -func (h *hostClient) FundAccount(ctx context.Context, desired types.Currency, rev *types.FileContractRevision) error { - log := h.logger.With( - zap.Stringer("host", h.hk), - zap.Stringer("account", h.acc.ID()), - ) - - // ensure we have at least 2H in the contract to cover the costs - if types.NewCurrency64(2).Cmp(rev.ValidRenterPayout()) >= 0 { - return fmt.Errorf("insufficient funds to fund account: %v <= %v", rev.ValidRenterPayout(), types.NewCurrency64(2)) - } - - // calculate the deposit amount - return h.acc.WithDeposit(func(balance types.Currency) (types.Currency, error) { - // return early if we have the desired balance - if balance.Cmp(desired) >= 0 { - return types.ZeroCurrency, nil - } - deposit := desired.Sub(balance) - - // fetch pricetable directly to bypass the gouging check - pt, _, err := h.pts.Fetch(ctx, h, rev) - if err != nil { - return types.ZeroCurrency, err - } - - // cap the deposit by what's left in the contract - cost := types.NewCurrency64(1) - availableFunds := rev.ValidRenterPayout().Sub(cost) - if deposit.Cmp(availableFunds) > 0 { - deposit = availableFunds - } - - // fund the account - if err := h.rhp3.FundAccount(ctx, rev, h.hk, h.siamuxAddr, deposit, h.acc.ID(), pt.HostPriceTable, h.renterKey); err != nil { - if rhp3.IsBalanceMaxExceeded(err) { - h.acc.ScheduleSync() - } - return types.ZeroCurrency, fmt.Errorf("failed to fund account with %v; %w", deposit, err) - } - - // record the spend - h.csr.RecordV1(*rev, api.ContractSpending{FundAccount: deposit.Add(cost)}) - - // log the account balance after funding - log.Debugw("fund account succeeded", - "balance", balance.ExactString(), - "deposit", deposit.ExactString(), - ) - return deposit, nil - }) -} - -func (h *hostClient) SyncAccount(ctx context.Context, rev *types.FileContractRevision) error { - // fetch pricetable directly to bypass the gouging check - pt, _, err := h.pts.Fetch(ctx, h, rev) - if err != nil { - return err - } - - // check only the AccountBalanceCost - if types.NewCurrency64(1).Cmp(pt.AccountBalanceCost) < 0 { - return fmt.Errorf("%w: host is gouging on AccountBalanceCost", gouging.ErrPriceTableGouging) - } - - return h.acc.WithSync(func() (types.Currency, error) { - return h.rhp3.SyncAccount(ctx, rev, h.hk, h.siamuxAddr, h.acc.ID(), pt.HostPriceTable, h.renterKey) - }) -} - -func (c *hostDownloadClient) DownloadSector(ctx context.Context, w io.Writer, root types.Hash256, offset, length uint64) (err error) { - return c.acc.WithWithdrawal(func() (types.Currency, error) { - pt, ptc, err := c.pts.Fetch(ctx, c, nil) - if err != nil { - return types.ZeroCurrency, err - } - - cost, err := c.rhp3.ReadSector(ctx, offset, length, root, w, c.hi.PublicKey, c.hi.SiamuxAddr, c.acc.ID(), c.acc.Key(), pt.HostPriceTable) - if err != nil { - return ptc, err - } - return ptc.Add(cost), nil - }) -} - -func (c *hostDownloadClient) PriceTable(ctx context.Context, rev *types.FileContractRevision) (hpt api.HostPriceTable, cost types.Currency, err error) { - hpt, err = c.rhp3.PriceTable(ctx, c.hi.PublicKey, c.hi.SiamuxAddr, rhp3.PreparePriceTableAccountPayment(c.acc.Key())) - if err == nil { - cost = hpt.UpdatePriceTableCost - } - return -} - -func (c *hostV2DownloadClient) DownloadSector(ctx context.Context, w io.Writer, root types.Hash256, offset, length uint64) (err error) { - return c.acc.WithWithdrawal(func() (types.Currency, error) { - prices, err := c.pts.Fetch(ctx, c) - if err != nil { - return types.ZeroCurrency, err - } - - res, err := c.rhp4.ReadSector(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), prices, c.acc.Token(), w, root, offset, length) - if err != nil { - return types.ZeroCurrency, err - } - return res.Usage.RenterCost(), nil - }) -} - -func (c *hostV2DownloadClient) Prices(ctx context.Context) (rhpv4.HostPrices, error) { - settings, err := c.rhp4.Settings(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr()) - if err != nil { - return rhpv4.HostPrices{}, err - } - return settings.Prices, nil -} - -func (c *hostUploadClient) PriceTable(ctx context.Context, rev *types.FileContractRevision) (hpt api.HostPriceTable, cost types.Currency, err error) { - hpt, err = c.rhp3.PriceTable(ctx, c.hi.PublicKey, c.hi.SiamuxAddr, rhp3.PreparePriceTableAccountPayment(c.acc.Key())) - if err == nil { - cost = hpt.UpdatePriceTableCost - } - return -} - -func (c *hostUploadClient) UploadSector(ctx context.Context, sectorRoot types.Hash256, sector *[rhpv2.SectorSize]byte) error { - rev, err := c.rhp3.Revision(ctx, c.fcid, c.hi.PublicKey, c.hi.SiamuxAddr) - if err != nil { - return fmt.Errorf("%w; %w", rhp3.ErrFailedToFetchRevision, err) - } else if rev.RevisionNumber == math.MaxUint64 { - return rhp3.ErrMaxRevisionReached - } - - var hpt rhpv3.HostPriceTable - if err := c.acc.WithWithdrawal(func() (amount types.Currency, err error) { - pt, cost, err := c.pts.Fetch(ctx, c, nil) - if err != nil { - return types.ZeroCurrency, err - } - hpt = pt.HostPriceTable - - gc, err := gouging.CheckerFromContext(ctx) - if err != nil { - return cost, err - } - if breakdown := gc.CheckV1(nil, &pt.HostPriceTable); breakdown.Gouging() { - return cost, fmt.Errorf("%w: %v", gouging.ErrPriceTableGouging, breakdown) - } - return cost, nil - }); err != nil { - return err - } - - cost, err := c.rhp3.AppendSector(ctx, sectorRoot, sector, &rev, c.hi.PublicKey, c.hi.SiamuxAddr, c.acc.ID(), hpt, c.rk) - if err != nil { - return fmt.Errorf("failed to upload sector: %w", err) - } - - c.csr.RecordV1(rev, api.ContractSpending{Uploads: cost}) - return nil -} - -func (c *hostV2UploadClient) UploadSector(ctx context.Context, sectorRoot types.Hash256, sector *[rhpv2.SectorSize]byte) error { - fc, err := c.rhp4.LatestRevision(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), c.fcid) - if err != nil { - return err - } - - rev := rhp.ContractRevision{ - ID: c.fcid, - Revision: fc, - } - - return c.acc.WithWithdrawal(func() (types.Currency, error) { - prices, err := c.pts.Fetch(ctx, c) - if err != nil { - return types.ZeroCurrency, err - } - - res, err := c.rhp4.WriteSector(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), prices, c.acc.Token(), utils.NewReaderLen(sector[:]), rhpv2.SectorSize, api.BlocksPerDay*3) - if err != nil { - return types.ZeroCurrency, fmt.Errorf("failed to write sector: %w", err) - } - cost := res.Usage.RenterCost() - - res2, err := c.rhp4.AppendSectors(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr(), prices, c.rk, rev, []types.Hash256{res.Root}) - if err != nil { - return cost, fmt.Errorf("failed to write sector: %w", err) - } - - c.csr.RecordV2(rhp.ContractRevision{ID: rev.ID, Revision: res2.Revision}, api.ContractSpending{Uploads: res2.Usage.RenterCost()}) - return cost, nil - }) -} - -func (c *hostV2UploadClient) Prices(ctx context.Context) (rhpv4.HostPrices, error) { - settings, err := c.rhp4.Settings(ctx, c.hi.PublicKey, c.hi.V2SiamuxAddr()) - if err != nil { - return rhpv4.HostPrices{}, err - } - return settings.Prices, nil -} diff --git a/worker/host_test.go b/worker/host_test.go index 9a0fcee56..ff41bd6f4 100644 --- a/worker/host_test.go +++ b/worker/host_test.go @@ -117,7 +117,7 @@ func (h *testHost) PublicKey() types.PublicKey { } func (h *testHost) DownloadSector(ctx context.Context, w io.Writer, root types.Hash256, offset, length uint64) error { - sector, exist := h.Sector(root) + sector, exist := h.Contract.Sector(root) if !exist { return rhp3.ErrSectorNotFound } @@ -129,7 +129,7 @@ func (h *testHost) DownloadSector(ctx context.Context, w io.Writer, root types.H } func (h *testHost) UploadSector(ctx context.Context, sectorRoot types.Hash256, sector *[rhpv2.SectorSize]byte) error { - h.AddSector(sectorRoot, sector) + h.Contract.AddSector(sectorRoot, sector) if h.uploadDelay > 0 { select { case <-time.After(h.uploadDelay): diff --git a/worker/worker.go b/worker/worker.go index 7ac7702b4..29e8d569e 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -25,10 +25,11 @@ import ( "go.sia.tech/renterd/build" "go.sia.tech/renterd/config" "go.sia.tech/renterd/internal/accounts" + "go.sia.tech/renterd/internal/contracts" "go.sia.tech/renterd/internal/download" "go.sia.tech/renterd/internal/gouging" + "go.sia.tech/renterd/internal/hosts" "go.sia.tech/renterd/internal/memory" - "go.sia.tech/renterd/internal/prices" "go.sia.tech/renterd/internal/rhp" rhp2 "go.sia.tech/renterd/internal/rhp/v2" rhp3 "go.sia.tech/renterd/internal/rhp/v3" @@ -144,22 +145,6 @@ type ( } ) -// TODO: deriving the renter key from the host key using the master key only -// works if we persist a hash of the renter's master key in the database and -// compare it on startup, otherwise there's no way of knowing the derived key is -// usuable -// NOTE: Instead of hashing the masterkey and comparing, we could use random -// bytes + the HMAC thereof as the salt. e.g. 32 bytes + 32 bytes HMAC. Then -// whenever we read a specific salt we can verify that is was created with a -// given key. That would eventually allow different masterkeys to coexist in the -// same bus. -// -// TODO: instead of deriving a renter key use a randomly generated salt so we're -// not limited to one key per host -func (w *Worker) deriveRenterKey(hostKey types.PublicKey) types.PrivateKey { - return w.masterKey.DeriveContractKey(hostKey) -} - // A worker talks to Sia hosts to perform contract and storage operations within // a renterd system. type Worker struct { @@ -176,17 +161,15 @@ type Worker struct { downloadManager *download.Manager uploadManager *upload.Manager + hostManager hosts.Manager - accounts *accounts.Manager - dialer *rhp.FallbackDialer - cache iworker.WorkerCache - priceTables *prices.PriceTables - pricesCache *prices.PricesCache + accounts *accounts.Manager + cache iworker.WorkerCache uploadsMu sync.Mutex uploadingPackedSlabs map[string]struct{} - contractSpendingRecorder ContractSpendingRecorder + contractSpendingRecorder contracts.SpendingRecorder shutdownCtx context.Context shutdownCtxCancel context.CancelFunc @@ -211,7 +194,7 @@ func (w *Worker) withRevision(ctx context.Context, fcid types.FileContractID, hk defer cancel() } - rev, err := w.Host(hk, fcid, siamuxAddr).FetchRevision(ctx, fcid) + rev, err := w.hostManager.Host(hk, fcid, siamuxAddr).FetchRevision(ctx, fcid) if err != nil { return err } @@ -624,13 +607,10 @@ func New(cfg config.Worker, masterKey [32]byte, b Bus, l *zap.Logger) (*Worker, w := &Worker{ alerts: a, cache: iworker.NewCache(b, l), - dialer: dialer, id: cfg.ID, bus: b, masterKey: masterKey, logger: l.Sugar(), - priceTables: prices.NewPriceTables(), - pricesCache: prices.NewPricesCache(), rhp2Client: rhp2.New(dialer, l), rhp3Client: rhp3.New(dialer, l), rhp4Client: rhp4.New(dialer), @@ -646,13 +626,15 @@ func New(cfg config.Worker, masterKey [32]byte, b Bus, l *zap.Logger) (*Worker, uploadKey := w.masterKey.DeriveUploadKey() + w.contractSpendingRecorder = contracts.NewSpendingRecorder(w.shutdownCtx, w.bus, cfg.BusFlushInterval, l) + hm := hosts.NewManager(w.masterKey, w.accounts, w.contractSpendingRecorder, dialer, l) + dlmm := memory.NewManager(cfg.UploadMaxMemory, l.Named("uploadmanager")) - w.downloadManager = download.NewManager(w.shutdownCtx, &uploadKey, w, dlmm, w.bus, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) + w.downloadManager = download.NewManager(w.shutdownCtx, &uploadKey, hm, dlmm, w.bus, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) ulmm := memory.NewManager(cfg.UploadMaxMemory, l.Named("uploadmanager")) - w.uploadManager = upload.NewManager(w.shutdownCtx, &uploadKey, w, ulmm, w.bus, w.bus, w.bus, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) + w.uploadManager = upload.NewManager(w.shutdownCtx, &uploadKey, hm, ulmm, w.bus, w.bus, w.bus, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) - w.initContractSpendingRecorder(cfg.BusFlushInterval) return w, nil } @@ -844,7 +826,7 @@ func (w *Worker) SyncAccount(ctx context.Context, fcid types.FileContractID, hos ctx = gouging.WithChecker(ctx, w.bus, gp) // sync the account - h := w.Host(host.PublicKey, fcid, host.SiamuxAddr) + h := w.hostManager.Host(host.PublicKey, fcid, host.SiamuxAddr) err = w.withRevision(ctx, fcid, host.PublicKey, host.SiamuxAddr, defaultRevisionFetchTimeout, lockingPrioritySyncing, func(rev types.FileContractRevision) error { return h.SyncAccount(ctx, &rev) }) From 738cd3fb12661ce0d9d7e9bebd8f23d499bad874 Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 10 Dec 2024 15:14:23 +0100 Subject: [PATCH 11/14] worker: set host manager --- worker/worker.go | 1 + 1 file changed, 1 insertion(+) diff --git a/worker/worker.go b/worker/worker.go index 29e8d569e..aca411d76 100644 --- a/worker/worker.go +++ b/worker/worker.go @@ -628,6 +628,7 @@ func New(cfg config.Worker, masterKey [32]byte, b Bus, l *zap.Logger) (*Worker, w.contractSpendingRecorder = contracts.NewSpendingRecorder(w.shutdownCtx, w.bus, cfg.BusFlushInterval, l) hm := hosts.NewManager(w.masterKey, w.accounts, w.contractSpendingRecorder, dialer, l) + w.hostManager = hm dlmm := memory.NewManager(cfg.UploadMaxMemory, l.Named("uploadmanager")) w.downloadManager = download.NewManager(w.shutdownCtx, &uploadKey, hm, dlmm, w.bus, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, l) From 33f7996d9d9d0c0f891f077be9ddf880f469ea82 Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 10 Dec 2024 15:22:12 +0100 Subject: [PATCH 12/14] migrator: cleanup --- autopilot/autopilot.go | 1 + autopilot/migrator/accounts.go | 50 ++++++++++++---------------------- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/autopilot/autopilot.go b/autopilot/autopilot.go index 5af46822c..12d0b419d 100644 --- a/autopilot/autopilot.go +++ b/autopilot/autopilot.go @@ -276,6 +276,7 @@ func (ap *Autopilot) Shutdown(ctx context.Context) error { ap.shutdownCtxCancel() close(ap.triggerChan) ap.wg.Wait() + ap.m.Stop() ap.s.Shutdown(ctx) ap.startTime = time.Time{} } diff --git a/autopilot/migrator/accounts.go b/autopilot/migrator/accounts.go index 80ee08339..aed89a0ce 100644 --- a/autopilot/migrator/accounts.go +++ b/autopilot/migrator/accounts.go @@ -64,35 +64,8 @@ func (m *migrator) SyncAccount(ctx context.Context, fcid types.FileContractID, h } ctx = gouging.WithChecker(ctx, m.bus, gp) - // sync the account - h := m.hostManager.Host(host.PublicKey, fcid, host.SiamuxAddr) - err = m.withRevision(ctx, fcid, host.PublicKey, host.SiamuxAddr, defaultRevisionFetchTimeout, lockingPrioritySyncing, func(rev types.FileContractRevision) error { - return h.SyncAccount(ctx, &rev) - }) - if err != nil { - return fmt.Errorf("failed to sync account; %w", err) - } - return nil -} - -func (m *migrator) withRevision(ctx context.Context, fcid types.FileContractID, hk types.PublicKey, siamuxAddr string, fetchTimeout time.Duration, lockPriority int, fn func(rev types.FileContractRevision) error) error { - return m.withContractLock(ctx, fcid, lockPriority, func() error { - if fetchTimeout > 0 { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, fetchTimeout) - defer cancel() - } - - rev, err := m.hostManager.Host(hk, fcid, siamuxAddr).FetchRevision(ctx, fcid) - if err != nil { - return err - } - return fn(rev) - }) -} - -func (m *migrator) withContractLock(ctx context.Context, fcid types.FileContractID, priority int, fn func() error) error { - contractLock, err := m.acquireContractLock(ctx, fcid, priority) + // acquire lock + contractLock, err := locking.NewContractLock(ctx, fcid, lockingPrioritySyncing, m.bus, m.logger) if err != nil { return err } @@ -102,9 +75,20 @@ func (m *migrator) withContractLock(ctx context.Context, fcid types.FileContract cancel() }() - return fn() -} + h := m.hostManager.Host(host.PublicKey, fcid, host.SiamuxAddr) -func (m *migrator) acquireContractLock(ctx context.Context, fcid types.FileContractID, priority int) (_ *locking.ContractLock, err error) { - return locking.NewContractLock(ctx, fcid, priority, m.bus, m.logger) + // fetch revision + ctx, cancel := context.WithTimeout(ctx, defaultRevisionFetchTimeout) + defer cancel() + rev, err := h.FetchRevision(ctx, fcid) + if err != nil { + return err + } + + // sync the account + err = h.SyncAccount(ctx, &rev) + if err != nil { + return fmt.Errorf("failed to sync account; %w", err) + } + return nil } From cde0fb1c1fa888ab78cdd0bf86c3f2100e419f59 Mon Sep 17 00:00:00 2001 From: PJ Date: Tue, 10 Dec 2024 15:41:02 +0100 Subject: [PATCH 13/14] host: remove FundAccount --- internal/host/host.go | 1 - internal/hosts/manager.go | 52 --------------------------------------- 2 files changed, 53 deletions(-) diff --git a/internal/host/host.go b/internal/host/host.go index 3e573ebbb..51536f3ce 100644 --- a/internal/host/host.go +++ b/internal/host/host.go @@ -26,7 +26,6 @@ type ( PriceTable(ctx context.Context, rev *types.FileContractRevision) (api.HostPriceTable, types.Currency, error) FetchRevision(ctx context.Context, fcid types.FileContractID) (types.FileContractRevision, error) - FundAccount(ctx context.Context, balance types.Currency, rev *types.FileContractRevision) error SyncAccount(ctx context.Context, rev *types.FileContractRevision) error } ) diff --git a/internal/hosts/manager.go b/internal/hosts/manager.go index 67d090061..17703e4f0 100644 --- a/internal/hosts/manager.go +++ b/internal/hosts/manager.go @@ -218,58 +218,6 @@ func (h *hostClient) FetchRevision(ctx context.Context, fcid types.FileContractI return h.rhp3.Revision(ctx, fcid, h.hk, h.siamuxAddr) } -func (h *hostClient) FundAccount(ctx context.Context, desired types.Currency, rev *types.FileContractRevision) error { - log := h.logger.With( - zap.Stringer("host", h.hk), - zap.Stringer("account", h.acc.ID()), - ) - - // ensure we have at least 2H in the contract to cover the costs - if types.NewCurrency64(2).Cmp(rev.ValidRenterPayout()) >= 0 { - return fmt.Errorf("insufficient funds to fund account: %v <= %v", rev.ValidRenterPayout(), types.NewCurrency64(2)) - } - - // calculate the deposit amount - return h.acc.WithDeposit(func(balance types.Currency) (types.Currency, error) { - // return early if we have the desired balance - if balance.Cmp(desired) >= 0 { - return types.ZeroCurrency, nil - } - deposit := desired.Sub(balance) - - // fetch pricetable directly to bypass the gouging check - pt, _, err := h.pts.Fetch(ctx, h, rev) - if err != nil { - return types.ZeroCurrency, err - } - - // cap the deposit by what's left in the contract - cost := types.NewCurrency64(1) - availableFunds := rev.ValidRenterPayout().Sub(cost) - if deposit.Cmp(availableFunds) > 0 { - deposit = availableFunds - } - - // fund the account - if err := h.rhp3.FundAccount(ctx, rev, h.hk, h.siamuxAddr, deposit, h.acc.ID(), pt.HostPriceTable, h.renterKey); err != nil { - if rhp3.IsBalanceMaxExceeded(err) { - h.acc.ScheduleSync() - } - return types.ZeroCurrency, fmt.Errorf("failed to fund account with %v; %w", deposit, err) - } - - // record the spend - h.csr.RecordV1(*rev, api.ContractSpending{FundAccount: deposit.Add(cost)}) - - // log the account balance after funding - log.Debugw("fund account succeeded", - "balance", balance.ExactString(), - "deposit", deposit.ExactString(), - ) - return deposit, nil - }) -} - func (h *hostClient) SyncAccount(ctx context.Context, rev *types.FileContractRevision) error { // fetch pricetable directly to bypass the gouging check pt, _, err := h.pts.Fetch(ctx, h, rev) From 8da3f8db18eaee68f599271211d37d276ec63b8c Mon Sep 17 00:00:00 2001 From: PJ Date: Wed, 11 Dec 2024 16:03:33 +0100 Subject: [PATCH 14/14] all: implement review remarks --- README.md | 14 ++++----- autopilot/autopilot.go | 27 +++++++++++------- autopilot/contractor/contractor.go | 4 +-- autopilot/migrator/migrator.go | 39 ++++++++++--------------- cmd/renterd/config.go | 46 ++++++++++++++---------------- cmd/renterd/node.go | 2 +- config/config.go | 35 ++++++++++------------- internal/accounts/accounts.go | 3 ++ internal/test/e2e/cluster.go | 29 +++++++++---------- 9 files changed, 95 insertions(+), 104 deletions(-) diff --git a/README.md b/README.md index 09f783319..d62c22086 100644 --- a/README.md +++ b/README.md @@ -80,18 +80,18 @@ overview of all settings configurable through the CLI. | `Worker.Enabled` | Enables/disables worker | `true` | `--worker.enabled` | `RENTERD_WORKER_ENABLED` | `worker.enabled` | | `Worker.AllowUnauthenticatedDownloads` | Allows unauthenticated downloads | - | `--worker.unauthenticatedDownloads` | `RENTERD_WORKER_UNAUTHENTICATED_DOWNLOADS` | `worker.allowUnauthenticatedDownloads` | | `Autopilot.Enabled` | Enables/disables autopilot | `true` | `--autopilot.enabled` | `RENTERD_AUTOPILOT_ENABLED` | `autopilot.enabled` | -| `Autopilot.AccountsRefillInterval` | Interval for refilling workers' account balances | `24h` | `--autopilot.accountRefillInterval` | - | `autopilot.accountsRefillInterval` | | `Autopilot.Heartbeat` | Interval for autopilot loop execution | `30m` | `--autopilot.heartbeat` | - | `autopilot.heartbeat` | +| `Autopilot.MigratorRefillInterval` | Interval for refilling account balances | `24h` | `--autopilot.migratorAccountRefillInterval` | - | `autopilot.migratorAccountsRefillInterval` | +| `Autopilot.MigratorHealthCutoff` | Threshold for migrating slabs based on health | `0.75` | `--autopilot.migratorHealthCutoff` | - | `autopilot.migratorHealthCutoff` | +| `Autopilot.MigratorNumThreads` | Number of threads migrating slabs | `1` | `--autopilot.migratorNumThreads` | - | `autopilot.migratorNumThreads` | +| `Autopilot.MigratorDownloadMaxOverdrive` | Max overdrive workers for migration downloads | `5` | `--autopilot.migratorDownloadMaxOverdrive` | - | `autopilot.migratorDownloadMaxOverdrive` | +| `Autopilot.MigratorDownloadOverdriveTimeout` | Timeout for overdriving migration downloads | `3s` | `--autopilot.migratorDownloadOverdriveTimeout` | - | `autopilot.migratorDownloadOverdriveTimeout` | +| `Autopilot.MigratorUploadMaxOverdrive` | Max overdrive workers for migration uploads | `5` | `--autopilot.migratorUploadMaxOverdrive` | - | `autopilot.migratorUploadMaxOverdrive` | +| `Autopilot.MigratorUploadOverdriveTimeout` | Timeout for overdriving migration uploads | `3s` | `--autopilot.migratorUploadOverdriveTimeout` | - | `autopilot.migratorUploadOverdriveTimeout` | | `Autopilot.RevisionBroadcastInterval`| Interval for broadcasting contract revisions | `168h` (7 days) | `--autopilot.revisionBroadcastInterval` | `RENTERD_AUTOPILOT_REVISION_BROADCAST_INTERVAL` | `autopilot.revisionBroadcastInterval` | | `Autopilot.ScannerBatchSize` | Batch size for host scanning | `1000` | `--autopilot.scannerBatchSize` | - | `autopilot.scannerBatchSize` | | `Autopilot.ScannerInterval` | Interval for scanning hosts | `24h` | `--autopilot.scannerInterval` | - | `autopilot.scannerInterval` | | `Autopilot.ScannerNumThreads` | Number of threads for scanning hosts | `100` | - | - | `autopilot.scannerNumThreads` | -| `Migrator.HealthCutoff` | Threshold for migrating slabs based on health | `0.75` | `--migrator.healthCutoff` | - | `migrator.HealthCutoff` | -| `Migrator.ParallelSlabsPerWorker` | Parallel slab migrations | `1` | `--migrator.parallelSlabsPerWorker` | `RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER` | `migrator.parallelSlabsPerWorker` | -| `Migrator.DownloadMaxOverdrive` | Max overdrive workers for migration downloads | `5` | `--migrator.downloadMaxOverdrive` | - | `migrator.downloadMaxOverdrive` | -| `Migrator.DownloadOverdriveTimeout` | Timeout for overdriving migration downloads | `3s` | `--migrator.downloadOverdriveTimeout` | - | `migrator.downloadOverdriveTimeout` | -| `Migrator.UploadMaxOverdrive` | Max overdrive workers for migration uploads | `5` | `--migrator.uploadMaxOverdrive` | - | `migrator.uploadMaxOverdrive` | -| `Migrator.UploadOverdriveTimeout` | Timeout for overdriving migration uploads | `3s` | `--migrator.uploadOverdriveTimeout` | - | `migrator.uploadOverdriveTimeout` | | `S3.Address` | Address for serving S3 API | `:9982` | `--s3.address` | `RENTERD_S3_ADDRESS` | `s3.address` | | `S3.DisableAuth` | Disables authentication for S3 API | `false` | `--s3.disableAuth` | `RENTERD_S3_DISABLE_AUTH` | `s3.disableAuth` | | `S3.Enabled` | Enables/disables S3 API | `true` | `--s3.enabled` | `RENTERD_S3_ENABLED` | `s3.enabled` | diff --git a/autopilot/autopilot.go b/autopilot/autopilot.go index 12d0b419d..dc9a2ba11 100644 --- a/autopilot/autopilot.go +++ b/autopilot/autopilot.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "net" "net/http" "runtime" "sync" @@ -22,6 +23,7 @@ import ( "go.sia.tech/renterd/autopilot/scanner" "go.sia.tech/renterd/build" "go.sia.tech/renterd/config" + "go.sia.tech/renterd/internal/rhp" "go.sia.tech/renterd/internal/utils" "go.sia.tech/renterd/object" "go.sia.tech/renterd/webhooks" @@ -146,31 +148,36 @@ type Autopilot struct { } // New initializes an Autopilot. -func New(cfg config.Autopilot, mCfg config.Migrator, masterKey utils.MasterKey, bus Bus, logger *zap.Logger) (_ *Autopilot, err error) { +func New(cfg config.Autopilot, masterKey utils.MasterKey, bus Bus, logger *zap.Logger) (_ *Autopilot, err error) { logger = logger.Named("autopilot") - shutdownCtx, shutdownCtxCancel := context.WithCancel(context.Background()) + + ctx, cancel := context.WithCancel(context.Background()) ap := &Autopilot{ alerts: alerts.WithOrigin(bus, "autopilot"), bus: bus, logger: logger.Sugar(), - shutdownCtx: shutdownCtx, - shutdownCtxCancel: shutdownCtxCancel, + shutdownCtx: ctx, + shutdownCtxCancel: cancel, tickerDuration: cfg.Heartbeat, pruningAlertIDs: make(map[types.FileContractID]types.Hash256), } - ap.s, err = scanner.New(ap.bus, cfg.ScannerBatchSize, cfg.ScannerNumThreads, cfg.ScannerInterval, logger) + // create scanner + ap.s, err = scanner.New(bus, cfg.ScannerBatchSize, cfg.ScannerNumThreads, cfg.ScannerInterval, logger) if err != nil { return } - ap.c = contractor.New(bus, bus, cfg.RevisionSubmissionBuffer, cfg.RevisionBroadcastInterval, cfg.AllowRedundantHostIPs, ap.logger) - ap.m, err = migrator.New(ap.shutdownCtx, mCfg, masterKey, ap.alerts, ap.bus, ap.bus, logger) + // create contractor + ap.c = contractor.New(bus, bus, cfg.RevisionSubmissionBuffer, cfg.RevisionBroadcastInterval, cfg.AllowRedundantHostIPs, logger) + + // create migrator + ap.m, err = migrator.New(ctx, masterKey, ap.alerts, bus, bus, cfg.MigratorHealthCutoff, cfg.MigratorNumThreads, cfg.MigratorDownloadMaxOverdrive, cfg.MigratorUploadMaxOverdrive, cfg.MigratorDownloadOverdriveTimeout, cfg.MigratorUploadOverdriveTimeout, cfg.MigratorAccountsRefillInterval, logger) if err != nil { - return + return nil, err } return ap, nil @@ -253,7 +260,7 @@ func (ap *Autopilot) Run() { for !ap.isStopped() { ap.logger.Info("autopilot iteration starting") tickerFired := make(chan struct{}) - ap.tick(forceScan, tickerFired) + ap.performMaintenance(forceScan, tickerFired) select { case <-ap.shutdownCtx.Done(): return @@ -407,7 +414,7 @@ func (ap *Autopilot) blockUntilSynced(interrupt <-chan time.Time) (synced, block } } -func (ap *Autopilot) tick(forceScan bool, tickerFired chan struct{}) { +func (ap *Autopilot) performMaintenance(forceScan bool, tickerFired chan struct{}) { defer ap.logger.Info("autopilot iteration ended") // initiate a host scan - no need to be synced or configured for scanning diff --git a/autopilot/contractor/contractor.go b/autopilot/contractor/contractor.go index 3bd3dab88..2ca8c607b 100644 --- a/autopilot/contractor/contractor.go +++ b/autopilot/contractor/contractor.go @@ -125,13 +125,13 @@ type ( } ) -func New(bus Bus, alerter alerts.Alerter, revisionSubmissionBuffer uint64, revisionBroadcastInterval time.Duration, allowRedundantHostIPs bool, logger *zap.SugaredLogger) *Contractor { +func New(bus Bus, alerter alerts.Alerter, revisionSubmissionBuffer uint64, revisionBroadcastInterval time.Duration, allowRedundantHostIPs bool, logger *zap.Logger) *Contractor { logger = logger.Named("contractor") return &Contractor{ bus: bus, alerter: alerter, churn: make(accumulatedChurn), - logger: logger, + logger: logger.Sugar(), allowRedundantHostIPs: allowRedundantHostIPs, diff --git a/autopilot/migrator/migrator.go b/autopilot/migrator/migrator.go index 296c3801a..e4069134c 100644 --- a/autopilot/migrator/migrator.go +++ b/autopilot/migrator/migrator.go @@ -2,7 +2,6 @@ package migrator import ( "context" - "fmt" "math" "net" "sort" @@ -13,7 +12,6 @@ import ( "go.sia.tech/core/types" "go.sia.tech/renterd/alerts" "go.sia.tech/renterd/api" - "go.sia.tech/renterd/config" "go.sia.tech/renterd/internal/accounts" "go.sia.tech/renterd/internal/contracts" "go.sia.tech/renterd/internal/download" @@ -87,10 +85,8 @@ type ( bus Bus ss SlabStore - healthCutoff float64 - parallelSlabsPerWorker uint64 - - masterKey utils.MasterKey + healthCutoff float64 + numThreads uint64 accounts *accounts.Manager downloadManager *download.Manager @@ -115,22 +111,15 @@ type ( } ) -func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, alerts alerts.Alerter, ss SlabStore, b Bus, logger *zap.Logger) (*migrator, error) { +func New(ctx context.Context, masterKey utils.MasterKey, alerts alerts.Alerter, ss SlabStore, b Bus, healthCutoff float64, numThreads, downloadMaxOverdrive, uploadMaxOverdrive uint64, downloadOverdriveTimeout, uploadOverdriveTimeout, accountsRefillInterval time.Duration, logger *zap.Logger) (*migrator, error) { logger = logger.Named("migrator") - - if cfg.AccountsRefillInterval == 0 { - return nil, fmt.Errorf("accounts refill interval must be set") - } - m := &migrator{ alerts: alerts, bus: b, ss: ss, - healthCutoff: cfg.HealthCutoff, - parallelSlabsPerWorker: cfg.ParallelSlabsPerWorker, - - masterKey: masterKey, + healthCutoff: healthCutoff, + numThreads: numThreads, signalConsensusNotSynced: make(chan struct{}, 1), signalMaintenanceFinished: make(chan struct{}, 1), @@ -142,8 +131,12 @@ func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, al logger: logger.Sugar(), } + // derive keys + ak := masterKey.DeriveAccountsKey("migrator") + uk := masterKey.DeriveUploadKey() + // create account manager - am, err := accounts.NewManager(masterKey.DeriveAccountsKey("migrator"), "migrator", alerts, m, m, b, b, b, b, cfg.AccountsRefillInterval, logger) + am, err := accounts.NewManager(ak, "migrator", alerts, m, m, b, b, b, b, accountsRefillInterval, logger) if err != nil { return nil, err } @@ -152,15 +145,13 @@ func New(ctx context.Context, cfg config.Migrator, masterKey utils.MasterKey, al // create host manager dialer := rhp.NewFallbackDialer(b, net.Dialer{}, logger) csr := contracts.NewSpendingRecorder(ctx, b, 5*time.Second, logger) - hm := hosts.NewManager(masterKey, am, csr, dialer, logger) - m.hostManager = hm + m.hostManager = hosts.NewManager(masterKey, am, csr, dialer, logger) m.rhp4Client = rhp4.New(dialer) // create upload & download manager mm := memory.NewManager(math.MaxInt64, logger) - uk := masterKey.DeriveUploadKey() - m.downloadManager = download.NewManager(ctx, &uk, hm, mm, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, logger) - m.uploadManager = upload.NewManager(ctx, &uk, hm, mm, b, b, b, cfg.UploadMaxOverdrive, cfg.UploadOverdriveTimeout, logger) + m.downloadManager = download.NewManager(ctx, &uk, m.hostManager, mm, b, downloadMaxOverdrive, downloadOverdriveTimeout, logger) + m.uploadManager = upload.NewManager(ctx, &uk, m.hostManager, mm, b, b, b, uploadMaxOverdrive, uploadOverdriveTimeout, logger) return m, nil } @@ -212,7 +203,7 @@ func (m *migrator) slabMigrationEstimate(remaining int) time.Duration { return 0 } - totalNumMS := float64(remaining) * p90 / float64(m.parallelSlabsPerWorker) + totalNumMS := float64(remaining) * p90 / float64(m.numThreads) return time.Duration(totalNumMS) * time.Millisecond } @@ -228,7 +219,7 @@ func (m *migrator) performMigrations(ctx context.Context) { }() // launch workers - for i := uint64(0); i < m.parallelSlabsPerWorker; i++ { + for i := uint64(0); i < m.numThreads; i++ { wg.Add(1) go func() { defer wg.Done() diff --git a/cmd/renterd/config.go b/cmd/renterd/config.go index 2b4774fab..464a8521b 100644 --- a/cmd/renterd/config.go +++ b/cmd/renterd/config.go @@ -107,21 +107,22 @@ func defaultConfig() config.Config { Autopilot: config.Autopilot{ Enabled: true, - RevisionSubmissionBuffer: 150, // 144 + 6 blocks leeway - Heartbeat: 30 * time.Minute, + Heartbeat: 30 * time.Minute, + + MigratorAccountsRefillInterval: defaultAccountRefillInterval, + MigratorHealthCutoff: 0.75, + MigratorNumThreads: 1, + MigratorDownloadMaxOverdrive: 5, + MigratorDownloadOverdriveTimeout: 3 * time.Second, + MigratorUploadMaxOverdrive: 5, + MigratorUploadOverdriveTimeout: 3 * time.Second, + RevisionBroadcastInterval: 7 * 24 * time.Hour, - ScannerBatchSize: 100, - ScannerInterval: 4 * time.Hour, - ScannerNumThreads: 10, - }, - Migrator: config.Migrator{ - AccountsRefillInterval: defaultAccountRefillInterval, - HealthCutoff: 0.75, - ParallelSlabsPerWorker: 1, - DownloadMaxOverdrive: 5, - DownloadOverdriveTimeout: 3 * time.Second, - UploadMaxOverdrive: 5, - UploadOverdriveTimeout: 3 * time.Second, + RevisionSubmissionBuffer: 150, // 144 + 6 blocks leeway + + ScannerBatchSize: 100, + ScannerInterval: 4 * time.Hour, + ScannerNumThreads: 10, }, S3: config.S3{ Address: "localhost:8080", @@ -301,14 +302,13 @@ func parseCLIFlags(cfg *config.Config) { flag.BoolVar(&cfg.Autopilot.Enabled, "autopilot.enabled", cfg.Autopilot.Enabled, "Enables/disables autopilot (overrides with RENTERD_AUTOPILOT_ENABLED)") flag.DurationVar(&cfg.ShutdownTimeout, "node.shutdownTimeout", cfg.ShutdownTimeout, "Timeout for node shutdown") - // migrator - flag.DurationVar(&cfg.Migrator.AccountsRefillInterval, "migrator.accountRefillInterval", cfg.Migrator.AccountsRefillInterval, "Interval for refilling migrator' account balances") - flag.Float64Var(&cfg.Migrator.HealthCutoff, "migrator.healthCutoff", cfg.Migrator.HealthCutoff, "Threshold for migrating slabs based on health") - flag.Uint64Var(&cfg.Migrator.ParallelSlabsPerWorker, "migrator.parallelSlabsPerWorker", cfg.Migrator.ParallelSlabsPerWorker, "Parallel slab migrations per worker (overrides with RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER)") - flag.Uint64Var(&cfg.Migrator.DownloadMaxOverdrive, "migrator.downloadMaxOverdrive", cfg.Migrator.DownloadMaxOverdrive, "Max overdrive workers for migration downloads") - flag.DurationVar(&cfg.Migrator.DownloadOverdriveTimeout, "migrator.downloadOverdriveTimeout", cfg.Migrator.DownloadOverdriveTimeout, "Timeout for overdriving migration downloads") - flag.Uint64Var(&cfg.Migrator.UploadMaxOverdrive, "migrator.uploadMaxOverdrive", cfg.Migrator.UploadMaxOverdrive, "Max overdrive workers for migration uploads") - flag.DurationVar(&cfg.Migrator.UploadOverdriveTimeout, "migrator.uploadOverdriveTimeout", cfg.Migrator.UploadOverdriveTimeout, "Timeout for overdriving migration uploads") + flag.DurationVar(&cfg.Autopilot.MigratorAccountsRefillInterval, "autopilot.migratorAccountRefillInterval", cfg.Autopilot.MigratorAccountsRefillInterval, "Interval for refilling migrator' account balances") + flag.Float64Var(&cfg.Autopilot.MigratorHealthCutoff, "autopilot.migratorHealthCutoff", cfg.Autopilot.MigratorHealthCutoff, "Threshold for migrating slabs based on health") + flag.Uint64Var(&cfg.Autopilot.MigratorNumThreads, "autopilot.migratorNumThreads", cfg.Autopilot.MigratorNumThreads, "Parallel slab migrations per worker (overrides with RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER)") + flag.Uint64Var(&cfg.Autopilot.MigratorDownloadMaxOverdrive, "autopilot.migratorDownloadMaxOverdrive", cfg.Autopilot.MigratorDownloadMaxOverdrive, "Max overdrive workers for migration downloads") + flag.DurationVar(&cfg.Autopilot.MigratorDownloadOverdriveTimeout, "autopilot.migratorDownloadOverdriveTimeout", cfg.Autopilot.MigratorDownloadOverdriveTimeout, "Timeout for overdriving migration downloads") + flag.Uint64Var(&cfg.Autopilot.MigratorUploadMaxOverdrive, "autopilot.migratorUploadMaxOverdrive", cfg.Autopilot.MigratorUploadMaxOverdrive, "Max overdrive workers for migration uploads") + flag.DurationVar(&cfg.Autopilot.MigratorUploadOverdriveTimeout, "autopilot.migratorUploadOverdriveTimeout", cfg.Autopilot.MigratorUploadOverdriveTimeout, "Timeout for overdriving migration uploads") // s3 flag.StringVar(&cfg.S3.Address, "s3.address", cfg.S3.Address, "Address for serving S3 API (overrides with RENTERD_S3_ADDRESS)") @@ -365,8 +365,6 @@ func parseEnvironmentVariables(cfg *config.Config) { parseEnvVar("RENTERD_AUTOPILOT_ENABLED", &cfg.Autopilot.Enabled) parseEnvVar("RENTERD_AUTOPILOT_REVISION_BROADCAST_INTERVAL", &cfg.Autopilot.RevisionBroadcastInterval) - parseEnvVar("RENTERD_MIGRATOR_PARALLEL_SLABS_PER_WORKER", &cfg.Migrator.ParallelSlabsPerWorker) - parseEnvVar("RENTERD_S3_ADDRESS", &cfg.S3.Address) parseEnvVar("RENTERD_S3_ENABLED", &cfg.S3.Enabled) parseEnvVar("RENTERD_S3_DISABLE_AUTH", &cfg.S3.DisableAuth) diff --git a/cmd/renterd/node.go b/cmd/renterd/node.go index a04072d6a..a250ae651 100644 --- a/cmd/renterd/node.go +++ b/cmd/renterd/node.go @@ -195,7 +195,7 @@ func newNode(cfg config.Config, network *consensus.Network, genesis types.Block) // initialise autopilot if cfg.Autopilot.Enabled { workerKey := blake2b.Sum256(append([]byte("worker"), pk...)) - ap, err := autopilot.New(cfg.Autopilot, cfg.Migrator, workerKey, bc, logger) + ap, err := autopilot.New(cfg.Autopilot, workerKey, bc, logger) if err != nil { logger.Fatal("failed to create autopilot: " + err.Error()) } diff --git a/config/config.go b/config/config.go index 5b4fd0147..86ef1d97f 100644 --- a/config/config.go +++ b/config/config.go @@ -24,7 +24,6 @@ type ( Worker Worker `yaml:"worker,omitempty"` S3 S3 `yaml:"s3,omitempty"` - Migrator Migrator `yaml:"migrator,omitempty"` Database Database `yaml:"database,omitempty"` Explorer ExplorerData `yaml:"explorer,omitempty"` } @@ -129,25 +128,21 @@ type ( // Autopilot contains the configuration for an autopilot. Autopilot struct { - Enabled bool `yaml:"enabled,omitempty"` - AllowRedundantHostIPs bool `yaml:"allowRedundantHostIPs,omitempty"` - Heartbeat time.Duration `yaml:"heartbeat,omitempty"` - RevisionBroadcastInterval time.Duration `yaml:"revisionBroadcastInterval,omitempty"` - RevisionSubmissionBuffer uint64 `yaml:"revisionSubmissionBuffer,omitempty"` - ScannerInterval time.Duration `yaml:"scannerInterval,omitempty"` - ScannerBatchSize uint64 `yaml:"scannerBatchSize,omitempty"` - ScannerNumThreads uint64 `yaml:"scannerNumThreads,omitempty"` - } - - // Migrator contains the configuration for a migrator. - Migrator struct { - AccountsRefillInterval time.Duration `yaml:"accountsRefillInterval,omitempty"` - DownloadMaxOverdrive uint64 `yaml:"downloadMaxOverdrive,omitempty"` - DownloadOverdriveTimeout time.Duration `yaml:"downloadOverdriveTimeout,omitempty"` - HealthCutoff float64 `yaml:"healthCutoff,omitempty"` - ParallelSlabsPerWorker uint64 `yaml:"parallelSlabsPerWorker,omitempty"` - UploadMaxOverdrive uint64 `yaml:"uploadMaxOverdrive,omitempty"` - UploadOverdriveTimeout time.Duration `yaml:"uploadOverdriveTimeout,omitempty"` + Enabled bool `yaml:"enabled,omitempty"` + AllowRedundantHostIPs bool `yaml:"allowRedundantHostIPs,omitempty"` + Heartbeat time.Duration `yaml:"heartbeat,omitempty"` + MigratorAccountsRefillInterval time.Duration `yaml:"migratorAccountsRefillInterval,omitempty"` + MigratorDownloadMaxOverdrive uint64 `yaml:"migratorDownloadMaxOverdrive,omitempty"` + MigratorDownloadOverdriveTimeout time.Duration `yaml:"migratorDownloadOverdriveTimeout,omitempty"` + MigratorHealthCutoff float64 `yaml:"migratorHealthCutoff,omitempty"` + MigratorNumThreads uint64 `yaml:"migratorNumThreads,omitempty"` + MigratorUploadMaxOverdrive uint64 `yaml:"migratorUploadMaxOverdrive,omitempty"` + MigratorUploadOverdriveTimeout time.Duration `yaml:"migratorUploadOverdriveTimeout,omitempty"` + RevisionBroadcastInterval time.Duration `yaml:"revisionBroadcastInterval,omitempty"` + RevisionSubmissionBuffer uint64 `yaml:"revisionSubmissionBuffer,omitempty"` + ScannerInterval time.Duration `yaml:"scannerInterval,omitempty"` + ScannerBatchSize uint64 `yaml:"scannerBatchSize,omitempty"` + ScannerNumThreads uint64 `yaml:"scannerNumThreads,omitempty"` } ) diff --git a/internal/accounts/accounts.go b/internal/accounts/accounts.go index e1a7b33b1..2a67b7002 100644 --- a/internal/accounts/accounts.go +++ b/internal/accounts/accounts.go @@ -109,6 +109,9 @@ type ( // save all accounts. func NewManager(key utils.AccountsKey, owner string, alerter alerts.Alerter, funder Funder, syncer Syncer, css ConsensusStateStore, cs ContractStore, hs HostStore, s Store, refillInterval time.Duration, l *zap.Logger) (*Manager, error) { logger := l.Named("accounts").Sugar() + if refillInterval == 0 { + return nil, errors.New("refill interval must be set") + } shutdownCtx, shutdownCancel := context.WithCancel(context.Background()) a := &Manager{ diff --git a/internal/test/e2e/cluster.go b/internal/test/e2e/cluster.go index 7fef4d7fa..f39846d65 100644 --- a/internal/test/e2e/cluster.go +++ b/internal/test/e2e/cluster.go @@ -237,7 +237,7 @@ func newTestCluster(t *testing.T, opts testClusterOptions) *TestCluster { wk = *opts.walletKey } - busCfg, workerCfg, apCfg, mCfg, dbCfg := testBusCfg(), testWorkerCfg(), testApCfg(), testMigratorCfg(), testDBCfg() + busCfg, workerCfg, apCfg, dbCfg := testBusCfg(), testWorkerCfg(), testApCfg(), testDBCfg() if opts.busCfg != nil { busCfg = *opts.busCfg } @@ -373,7 +373,7 @@ func newTestCluster(t *testing.T, opts testClusterOptions) *TestCluster { s3ShutdownFns = append(s3ShutdownFns, s3Server.Shutdown) // Create autopilot. - ap, err := autopilot.New(apCfg, mCfg, workerKey, busClient, logger) + ap, err := autopilot.New(apCfg, workerKey, busClient, logger) tt.OK(err) autopilotAuth := jape.BasicAuth(autopilotPassword) @@ -1006,21 +1006,18 @@ func testApCfg() config.Autopilot { AllowRedundantHostIPs: true, Heartbeat: time.Second, RevisionSubmissionBuffer: 0, - ScannerInterval: 10 * time.Millisecond, - ScannerBatchSize: 10, - ScannerNumThreads: 1, - } -} -func testMigratorCfg() config.Migrator { - return config.Migrator{ - AccountsRefillInterval: 10 * time.Millisecond, - HealthCutoff: 0.99, - ParallelSlabsPerWorker: 1, - DownloadMaxOverdrive: 5, - DownloadOverdriveTimeout: 500 * time.Millisecond, - UploadOverdriveTimeout: 500 * time.Millisecond, - UploadMaxOverdrive: 5, + MigratorAccountsRefillInterval: 10 * time.Millisecond, + MigratorHealthCutoff: 0.99, + MigratorNumThreads: 1, + MigratorDownloadMaxOverdrive: 5, + MigratorDownloadOverdriveTimeout: 500 * time.Millisecond, + MigratorUploadOverdriveTimeout: 500 * time.Millisecond, + MigratorUploadMaxOverdrive: 5, + + ScannerInterval: 10 * time.Millisecond, + ScannerBatchSize: 10, + ScannerNumThreads: 1, } }