From c44143a260fdc17977a651bb722f131de78e3b85 Mon Sep 17 00:00:00 2001 From: Polina Bungina Date: Thu, 10 Oct 2024 21:40:15 +0200 Subject: [PATCH] Add major upgrade prechecks Don't fail major upgrade (don't set annotation) if replica(s) are not (yet) streaming or replication lag is too high --- go.mod | 1 + go.sum | 2 ++ pkg/cluster/majorversionupgrade.go | 41 +++++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index 69037040e..d6390f45f 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( ) require ( + github.com/Masterminds/semver v1.5.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/evanphx/json-patch v4.12.0+incompatible // indirect diff --git a/go.sum b/go.sum index d90bfdb5b..c7992fea0 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= +github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/aws/aws-sdk-go v1.53.8 h1:eoqGb1WOHIrCFKo1d51cMcnt1ralfLFaEqRkC5Zzv8k= diff --git a/pkg/cluster/majorversionupgrade.go b/pkg/cluster/majorversionupgrade.go index 1c5a670eb..e8876dc49 100644 --- a/pkg/cluster/majorversionupgrade.go +++ b/pkg/cluster/majorversionupgrade.go @@ -6,6 +6,7 @@ import ( "fmt" "strings" + "github.com/Masterminds/semver" "github.com/zalando/postgres-operator/pkg/spec" "github.com/zalando/postgres-operator/pkg/util" v1 "k8s.io/api/core/v1" @@ -170,6 +171,38 @@ func (c *Cluster) majorVersionUpgrade() error { return nil } + members, err := c.patroni.GetClusterMembers(masterPod) + if err != nil { + c.logger.Error("could not get cluster members data from Patroni API, skipping major version upgrade") + return err + } + patroniData, err := c.patroni.GetMemberData(masterPod) + if err != nil { + c.logger.Error("could not get members data from Patroni API, skipping major version upgrade") + return err + } + patroniVer, err := semver.NewVersion(patroniData.Patroni.Version) + if err != nil { + c.logger.Error("error parsing Patroni version") + patroniVer, _ = semver.NewVersion("3.0.4") + } + verConstraint, _ := semver.NewConstraint(">= 3.0.4") + checkStreaming, _ := verConstraint.Validate(patroniVer) + + for _, member := range members { + if PostgresRole(member.Role) == Leader { + continue + } + if checkStreaming && member.State != "streaming" { + c.logger.Infof("skipping major version upgrade, replica %s is not streaming from primary", member.Name) + return nil + } + if member.Lag > 16*1024*1024 { + c.logger.Infof("skipping major version upgrade, replication lag on member %s is too high", member.Name) + return nil + } + } + isUpgradeSuccess := true numberOfPods := len(pods) if allRunning && masterPod != nil { @@ -187,19 +220,21 @@ func (c *Cluster) majorVersionUpgrade() error { } resultIdCheck = strings.TrimSuffix(resultIdCheck, "\n") - var result string + var result, scriptErrMsg string if resultIdCheck != "0" { c.logger.Infof("user id was identified as: %s, hence default user is non-root already", resultIdCheck) result, err = c.ExecCommand(podName, "/bin/bash", "-c", upgradeCommand) + scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log") } else { c.logger.Infof("user id was identified as: %s, using su to reach the postgres user", resultIdCheck) result, err = c.ExecCommand(podName, "/bin/su", "postgres", "-c", upgradeCommand) + scriptErrMsg, _ = c.ExecCommand(podName, "/bin/bash", "-c", "tail -n 1 last_upgrade.log") } if err != nil { isUpgradeSuccess = false c.annotatePostgresResource(isUpgradeSuccess) - c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, err) - return err + c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "Major Version Upgrade", "upgrade from %d to %d FAILED: %v", c.currentMajorVersion, desiredVersion, scriptErrMsg) + return fmt.Errorf(scriptErrMsg) } c.annotatePostgresResource(isUpgradeSuccess)