Skip to content

Commit

Permalink
Fix early exit in compatibility check(#41)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jorres authored Nov 5, 2024
1 parent f8c2932 commit 50891ad
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .changes/unreleased/Fixed-20241105-192850.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kind: Fixed
body: ydbops now properly continues the restart loop even if listing nodes during maintenance check fails with "retry exceeded" error
time: 2024-11-05T19:28:50.308019908+01:00
7 changes: 6 additions & 1 deletion pkg/rolling/rolling.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package rolling

import (
"bytes"
"errors"
"fmt"
"math"
"strings"
Expand Down Expand Up @@ -220,6 +221,7 @@ func (r *Rolling) cmsWaitingLoop(task cms.MaintenanceTask, totalNodes int) error
task, err = r.cms.RefreshMaintenanceTask(taskID)
if err != nil {
r.logger.Warnf("Failed to refresh maintenance task: %+v", err)
continue
}

// NOTE: compatibility check will not fire if rolling restart just
Expand All @@ -235,7 +237,10 @@ func (r *Rolling) cmsWaitingLoop(task cms.MaintenanceTask, totalNodes int) error
// issues once more. We better exit quickly.
if !r.opts.SuppressCompatibilityCheck {
incompatible := r.tryDetectCompatibilityIssues()
if incompatible != nil {

// if error is retryExceeded, just keep trying - maybe you have been asking CMS
// from a node that has just been restarted, and it's okay.
if incompatible != nil && !errors.Is(incompatible, &utils.RetryExceededError{}) {
return incompatible
}
}
Expand Down
23 changes: 22 additions & 1 deletion pkg/utils/retries.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,24 @@ import (
"google.golang.org/grpc/status"
)

type RetryExceededError struct {
msg string
err error
}

func (e *RetryExceededError) Error() string {
return e.msg + ". Last error:" + e.err.Error()
}

func (e *RetryExceededError) Unwrap() error {
return e.err
}

func (e *RetryExceededError) Is(targetErr error) bool {
_, ok := targetErr.(*RetryExceededError)
return ok
}

func backoffTimeAfter(attempt int) time.Duration {
return time.Second * time.Duration(int(math.Pow(2, float64(attempt))))
}
Expand Down Expand Up @@ -45,5 +63,8 @@ func WrapWithRetries(
}
}

return nil, fmt.Errorf("number of retries exceeded: %v. Last error: %w", maxAttempts, lastError)
return nil, &RetryExceededError{
msg: fmt.Sprintf("number of retries exceeded: %v", maxAttempts),
err: lastError,
}
}

0 comments on commit 50891ad

Please sign in to comment.