From 7bef47868046c40245050a263a5f2ad7ef20e997 Mon Sep 17 00:00:00 2001 From: Ethan Mosbaugh Date: Wed, 20 Nov 2024 10:29:16 -0800 Subject: [PATCH 1/7] chore(ec): send upgrade events instead of the operator --- pkg/embeddedcluster/metrics.go | 101 ++++++++++++++++++++++++++++ pkg/embeddedcluster/util.go | 15 +++++ pkg/operator/operator.go | 72 ++++++++++++++++++-- pkg/upgradeservice/deploy/deploy.go | 58 ++++++++++++++++ 4 files changed, 240 insertions(+), 6 deletions(-) create mode 100644 pkg/embeddedcluster/metrics.go diff --git a/pkg/embeddedcluster/metrics.go b/pkg/embeddedcluster/metrics.go new file mode 100644 index 0000000000..695b86693a --- /dev/null +++ b/pkg/embeddedcluster/metrics.go @@ -0,0 +1,101 @@ +package embeddedcluster + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "time" + + embeddedclusterv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1" +) + +// UpgradeStartedEvent is send back home when the upgrade starts. +type UpgradeStartedEvent struct { + ClusterID string `json:"clusterID"` + TargetVersion string `json:"targetVersion"` + InitialVersion string `json:"initialVersion"` +} + +// UpgradeFailedEvent is send back home when the upgrade fails. +type UpgradeFailedEvent struct { + ClusterID string `json:"clusterID"` + TargetVersion string `json:"targetVersion"` + InitialVersion string `json:"initialVersion"` + Reason string `json:"reason"` +} + +// UpgradeSucceededEvent event is send back home when the upgrade succeeds. +type UpgradeSucceededEvent struct { + ClusterID string `json:"clusterID"` + TargetVersion string `json:"targetVersion"` + InitialVersion string `json:"initialVersion"` +} + +// NotifyUpgradeStarted notifies the metrics server that an upgrade has started. +func NotifyUpgradeStarted(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation) error { + if ins.Spec.AirGap { + return nil + } + return sendEvent(ctx, "UpgradeStarted", baseURL, UpgradeStartedEvent{ + ClusterID: ins.Spec.ClusterID, + TargetVersion: ins.Spec.Config.Version, + InitialVersion: prev.Spec.Config.Version, + }) +} + +// NotifyUpgradeFailed notifies the metrics server that an upgrade has failed. +func NotifyUpgradeFailed(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation) error { + if ins.Spec.AirGap { + return nil + } + return sendEvent(ctx, "UpgradeFailed", baseURL, UpgradeFailedEvent{ + ClusterID: ins.Spec.ClusterID, + TargetVersion: ins.Spec.Config.Version, + InitialVersion: prev.Spec.Config.Version, + }) +} + +// NotifyUpgradeSucceeded notifies the metrics server that an upgrade has succeeded. +func NotifyUpgradeSucceeded(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation) error { + if ins.Spec.AirGap { + return nil + } + return sendEvent(ctx, "UpgradeSucceeded", baseURL, UpgradeSucceededEvent{ + ClusterID: ins.Spec.ClusterID, + TargetVersion: ins.Spec.Config.Version, + InitialVersion: prev.Spec.Config.Version, + }) +} + +// sendEvent sends the received event to the metrics server through a post request. +func sendEvent(ctx context.Context, evname, baseURL string, ev interface{}) error { + url := fmt.Sprintf("%s/embedded_cluster_metrics/%s", baseURL, evname) + + body := map[string]interface{}{"event": ev} + buf := bytes.NewBuffer(nil) + if err := json.NewEncoder(buf).Encode(body); err != nil { + return err + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, buf) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + + client := &http.Client{ + Timeout: 5 * time.Second, + } + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("failed to send event: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("failed to send event: %s", resp.Status) + } + return nil +} diff --git a/pkg/embeddedcluster/util.go b/pkg/embeddedcluster/util.go index 5a7945ea99..aaf0095521 100644 --- a/pkg/embeddedcluster/util.go +++ b/pkg/embeddedcluster/util.go @@ -68,6 +68,21 @@ func GetCurrentInstallation(ctx context.Context, kbClient kbclient.Client) (*emb return &installations[0], nil } +// GetCurrentInstallation returns the second most recent installation object from the cluster. +func GetPreviousInstallation(ctx context.Context, kbClient kbclient.Client) (*embeddedclusterv1beta1.Installation, error) { + installations, err := ListInstallations(ctx, kbClient) + if err != nil { + return nil, fmt.Errorf("failed to list installations: %w", err) + } + if len(installations) < 2 { + return nil, nil + } + sort.SliceStable(installations, func(i, j int) bool { + return installations[j].Name < installations[i].Name + }) + return &installations[1], nil +} + func ListInstallations(ctx context.Context, kbClient kbclient.Client) ([]embeddedclusterv1beta1.Installation, error) { var installationList embeddedclusterv1beta1.InstallationList if err := kbClient.List(ctx, &installationList, &kbclient.ListOptions{}); err != nil { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index a4348efe17..569860bfbb 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -12,6 +12,7 @@ import ( "time" "github.com/pkg/errors" + embeddedclusterv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1" downstreamtypes "github.com/replicatedhq/kots/pkg/api/downstream/types" "github.com/replicatedhq/kots/pkg/app" apptypes "github.com/replicatedhq/kots/pkg/app/types" @@ -53,6 +54,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/cache" + kbclient "sigs.k8s.io/controller-runtime/pkg/client" ) var ( @@ -933,7 +935,7 @@ func (o *Operator) reconcileDeployment(cm *corev1.ConfigMap) (finalError error) if cm.Data["requires-cluster-upgrade"] == "true" { // wait for cluster upgrade even if the embedded cluster version doesn't match yet // in order to continuously report progress to the user - if err := o.waitForClusterUpgrade(cm.Data["app-slug"]); err != nil { + if err := o.waitForClusterUpgrade(cm.Data["app-id"], cm.Data["app-slug"]); err != nil { return errors.Wrap(err, "failed to wait for cluster upgrade") } } @@ -1035,21 +1037,29 @@ func (o *Operator) reconcileDeployment(cm *corev1.ConfigMap) (finalError error) return nil } -func (o *Operator) waitForClusterUpgrade(appSlug string) error { - kbClient, err := k8sutil.GetKubeClient(context.Background()) +func (o *Operator) waitForClusterUpgrade(appID string, appSlug string) error { + ctx := context.Background() + + kbClient, err := k8sutil.GetKubeClient(ctx) if err != nil { return errors.Wrap(err, "failed to get kube client") } logger.Infof("waiting for cluster upgrade to finish") for { - ins, err := embeddedcluster.GetCurrentInstallation(context.Background(), kbClient) + ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient) if err != nil { return errors.Wrap(err, "failed to wait for embedded cluster installation") } - if embeddedcluster.InstallationSucceeded(context.Background(), ins) { + if embeddedcluster.InstallationSucceeded(ctx, ins) { + if err := o.notifyUpgradeSucceeded(ctx, kbClient, ins, appID); err != nil { + logger.Errorf("Failed to notify upgrade succeeded: %v", err) + } return nil } - if embeddedcluster.InstallationFailed(context.Background(), ins) { + if embeddedcluster.InstallationFailed(ctx, ins) { + if err := o.notifyUpgradeFailed(ctx, kbClient, ins, appID); err != nil { + logger.Errorf("Failed to notify upgrade failed: %v", err) + } if err := upgradeservicetask.SetStatusUpgradeFailed(appSlug, ins.Status.Reason); err != nil { return errors.Wrap(err, "failed to set task status to failed") } @@ -1061,3 +1071,53 @@ func (o *Operator) waitForClusterUpgrade(appSlug string) error { time.Sleep(5 * time.Second) } } + +// notifyUpgradeSucceeded sends a metrics event to the api that the upgrade succeeded. +func (o *Operator) notifyUpgradeSucceeded(ctx context.Context, kbClient kbclient.Client, ins *embeddedclusterv1beta1.Installation, appID string) error { + if ins.Spec.AirGap { + return nil + } + + license, err := o.store.GetLatestLicenseForApp(appID) + if err != nil { + return errors.Wrapf(err, "failed to get latest license for app %s", appID) + } + + prev, err := embeddedcluster.GetPreviousInstallation(ctx, kbClient) + if err != nil { + return errors.Wrap(err, "failed to get previous installation") + } else if prev == nil { + return errors.New("previous installation not found") + } + + err = embeddedcluster.NotifyUpgradeSucceeded(ctx, license.Spec.Endpoint, ins, prev) + if err != nil { + return errors.Wrap(err, "failed to send event") + } + return nil +} + +// notifyUpgradeFailed sends a metrics event to the api that the upgrade failed. +func (o *Operator) notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Client, ins *embeddedclusterv1beta1.Installation, appID string) error { + if ins.Spec.AirGap { + return nil + } + + license, err := o.store.GetLatestLicenseForApp(appID) + if err != nil { + return errors.Wrapf(err, "failed to get latest license for app %s", appID) + } + + prev, err := embeddedcluster.GetPreviousInstallation(ctx, kbClient) + if err != nil { + return errors.Wrap(err, "failed to get previous installation") + } else if prev == nil { + return errors.New("previous installation not found") + } + + err = embeddedcluster.NotifyUpgradeFailed(ctx, license.Spec.Endpoint, ins, prev) + if err != nil { + return errors.Wrap(err, "failed to send event") + } + return nil +} diff --git a/pkg/upgradeservice/deploy/deploy.go b/pkg/upgradeservice/deploy/deploy.go index 1ffae96153..39a56c5a44 100644 --- a/pkg/upgradeservice/deploy/deploy.go +++ b/pkg/upgradeservice/deploy/deploy.go @@ -23,6 +23,7 @@ import ( corev1 "k8s.io/api/core/v1" kuberneteserrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kbclient "sigs.k8s.io/controller-runtime/pkg/client" ) type CanDeployOptions struct { @@ -119,6 +120,9 @@ func Deploy(opts DeployOptions) error { go func() (finalError error) { defer func() { if finalError != nil { + if err := notifyUpgradeFailed(context.Background(), kbClient, opts); err != nil { + logger.Errorf("Failed to notify upgrade failed: %v", err) + } if err := task.SetStatusUpgradeFailed(opts.Params.AppSlug, finalError.Error()); err != nil { logger.Error(errors.Wrap(err, "failed to set task status to upgrade failed")) } @@ -129,6 +133,10 @@ func Deploy(opts DeployOptions) error { defer close(finishedCh) tasks.StartTicker(task.GetID(opts.Params.AppSlug), finishedCh) + if err := notifyUpgradeStarted(context.Background(), kbClient, opts); err != nil { + logger.Errorf("Failed to notify upgrade started: %v", err) + } + if err := embeddedcluster.StartClusterUpgrade(context.Background(), opts.KotsKinds, opts.RegistrySettings); err != nil { return errors.Wrap(err, "failed to start cluster upgrade") } @@ -150,6 +158,56 @@ func Deploy(opts DeployOptions) error { return nil } +// notifyUpgradeStarted sends a metrics event to the api that the upgrade started. +func notifyUpgradeStarted(ctx context.Context, kbClient kbclient.Client, opts DeployOptions) error { + ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient) + if err != nil { + return fmt.Errorf("failed to get current installation: %w", err) + } + + if ins.Spec.AirGap { + return nil + } + + prev, err := embeddedcluster.GetPreviousInstallation(ctx, kbClient) + if err != nil { + return errors.Wrap(err, "failed to get previous installation") + } else if prev == nil { + return errors.New("previous installation not found") + } + + err = embeddedcluster.NotifyUpgradeStarted(ctx, opts.KotsKinds.License.Spec.Endpoint, ins, prev) + if err != nil { + return errors.Wrap(err, "failed to send event") + } + return nil +} + +// notifyUpgradeFailed sends a metrics event to the api that the upgrade failed. +func notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Client, opts DeployOptions) error { + ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient) + if err != nil { + return fmt.Errorf("failed to get current installation: %w", err) + } + + if ins.Spec.AirGap { + return nil + } + + prev, err := embeddedcluster.GetPreviousInstallation(ctx, kbClient) + if err != nil { + return errors.Wrap(err, "failed to get previous installation") + } else if prev == nil { + return errors.New("previous installation not found") + } + + err = embeddedcluster.NotifyUpgradeFailed(ctx, opts.KotsKinds.License.Spec.Endpoint, ins, prev) + if err != nil { + return errors.Wrap(err, "failed to send event") + } + return nil +} + type createDeploymentOptions struct { ctx context.Context isSkipPreflights bool From d37d3ed124e545e1a9c0d82b0b5509dd150e22a3 Mon Sep 17 00:00:00 2001 From: Ethan Mosbaugh Date: Wed, 20 Nov 2024 14:57:28 -0800 Subject: [PATCH 2/7] f --- pkg/embeddedcluster/metrics.go | 6 +++++- pkg/operator/operator.go | 6 ++++-- pkg/upgradeservice/deploy/deploy.go | 6 +++--- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pkg/embeddedcluster/metrics.go b/pkg/embeddedcluster/metrics.go index 695b86693a..03fd113cfe 100644 --- a/pkg/embeddedcluster/metrics.go +++ b/pkg/embeddedcluster/metrics.go @@ -9,6 +9,7 @@ import ( "time" embeddedclusterv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1" + "github.com/replicatedhq/kots/pkg/logger" ) // UpgradeStartedEvent is send back home when the upgrade starts. @@ -46,7 +47,7 @@ func NotifyUpgradeStarted(ctx context.Context, baseURL string, ins, prev *embedd } // NotifyUpgradeFailed notifies the metrics server that an upgrade has failed. -func NotifyUpgradeFailed(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation) error { +func NotifyUpgradeFailed(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation, reason string) error { if ins.Spec.AirGap { return nil } @@ -54,6 +55,7 @@ func NotifyUpgradeFailed(ctx context.Context, baseURL string, ins, prev *embedde ClusterID: ins.Spec.ClusterID, TargetVersion: ins.Spec.Config.Version, InitialVersion: prev.Spec.Config.Version, + Reason: reason, }) } @@ -73,6 +75,8 @@ func NotifyUpgradeSucceeded(ctx context.Context, baseURL string, ins, prev *embe func sendEvent(ctx context.Context, evname, baseURL string, ev interface{}) error { url := fmt.Sprintf("%s/embedded_cluster_metrics/%s", baseURL, evname) + logger.Infof("Sending event %s to %s", evname, url) + body := map[string]interface{}{"event": ev} buf := bytes.NewBuffer(nil) if err := json.NewEncoder(buf).Encode(body); err != nil { diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index 569860bfbb..e5def5d443 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -1044,19 +1044,21 @@ func (o *Operator) waitForClusterUpgrade(appID string, appSlug string) error { if err != nil { return errors.Wrap(err, "failed to get kube client") } - logger.Infof("waiting for cluster upgrade to finish") + logger.Infof("Waiting for cluster upgrade to finish") for { ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient) if err != nil { return errors.Wrap(err, "failed to wait for embedded cluster installation") } if embeddedcluster.InstallationSucceeded(ctx, ins) { + logger.Infof("Cluster upgrade succeeded") if err := o.notifyUpgradeSucceeded(ctx, kbClient, ins, appID); err != nil { logger.Errorf("Failed to notify upgrade succeeded: %v", err) } return nil } if embeddedcluster.InstallationFailed(ctx, ins) { + logger.Infof("Cluster upgrade failed") if err := o.notifyUpgradeFailed(ctx, kbClient, ins, appID); err != nil { logger.Errorf("Failed to notify upgrade failed: %v", err) } @@ -1115,7 +1117,7 @@ func (o *Operator) notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Cl return errors.New("previous installation not found") } - err = embeddedcluster.NotifyUpgradeFailed(ctx, license.Spec.Endpoint, ins, prev) + err = embeddedcluster.NotifyUpgradeFailed(ctx, license.Spec.Endpoint, ins, prev, ins.Status.Reason) if err != nil { return errors.Wrap(err, "failed to send event") } diff --git a/pkg/upgradeservice/deploy/deploy.go b/pkg/upgradeservice/deploy/deploy.go index 39a56c5a44..427b3adba6 100644 --- a/pkg/upgradeservice/deploy/deploy.go +++ b/pkg/upgradeservice/deploy/deploy.go @@ -120,7 +120,7 @@ func Deploy(opts DeployOptions) error { go func() (finalError error) { defer func() { if finalError != nil { - if err := notifyUpgradeFailed(context.Background(), kbClient, opts); err != nil { + if err := notifyUpgradeFailed(context.Background(), kbClient, opts, finalError.Error()); err != nil { logger.Errorf("Failed to notify upgrade failed: %v", err) } if err := task.SetStatusUpgradeFailed(opts.Params.AppSlug, finalError.Error()); err != nil { @@ -184,7 +184,7 @@ func notifyUpgradeStarted(ctx context.Context, kbClient kbclient.Client, opts De } // notifyUpgradeFailed sends a metrics event to the api that the upgrade failed. -func notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Client, opts DeployOptions) error { +func notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Client, opts DeployOptions, reason string) error { ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient) if err != nil { return fmt.Errorf("failed to get current installation: %w", err) @@ -201,7 +201,7 @@ func notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Client, opts Dep return errors.New("previous installation not found") } - err = embeddedcluster.NotifyUpgradeFailed(ctx, opts.KotsKinds.License.Spec.Endpoint, ins, prev) + err = embeddedcluster.NotifyUpgradeFailed(ctx, opts.KotsKinds.License.Spec.Endpoint, ins, prev, reason) if err != nil { return errors.Wrap(err, "failed to send event") } From 58829b0ee8d520cfe78f701b30249da4f0f2a028 Mon Sep 17 00:00:00 2001 From: Ethan Mosbaugh Date: Thu, 21 Nov 2024 12:21:44 -0800 Subject: [PATCH 3/7] f --- pkg/embeddedcluster/upgrade.go | 6 ++++++ pkg/upgradeservice/deploy/deploy.go | 29 ----------------------------- 2 files changed, 6 insertions(+), 29 deletions(-) diff --git a/pkg/embeddedcluster/upgrade.go b/pkg/embeddedcluster/upgrade.go index 918f0b2372..dae1f3b163 100644 --- a/pkg/embeddedcluster/upgrade.go +++ b/pkg/embeddedcluster/upgrade.go @@ -22,6 +22,7 @@ import ( dockerregistrytypes "github.com/replicatedhq/kots/pkg/docker/registry/types" "github.com/replicatedhq/kots/pkg/imageutil" "github.com/replicatedhq/kots/pkg/k8sutil" + "github.com/replicatedhq/kots/pkg/logger" registrytypes "github.com/replicatedhq/kots/pkg/registry/types" "github.com/replicatedhq/kots/pkg/util" kotsv1beta1 "github.com/replicatedhq/kotskinds/apis/kots/v1beta1" @@ -75,6 +76,11 @@ func startClusterUpgrade( log.Printf("Starting cluster upgrade to version %s...", newcfg.Version) + // We cannot notify the upgrade started until the new install is created + if err := NotifyUpgradeStarted(ctx, license.Spec.Endpoint, newInstall, current); err != nil { + logger.Errorf("Failed to notify upgrade started: %v", err) + } + err = runClusterUpgrade(ctx, k8sClient, newInstall, registrySettings, license, versionLabel) if err != nil { return fmt.Errorf("run cluster upgrade: %w", err) diff --git a/pkg/upgradeservice/deploy/deploy.go b/pkg/upgradeservice/deploy/deploy.go index 427b3adba6..3bd84c42d7 100644 --- a/pkg/upgradeservice/deploy/deploy.go +++ b/pkg/upgradeservice/deploy/deploy.go @@ -133,10 +133,6 @@ func Deploy(opts DeployOptions) error { defer close(finishedCh) tasks.StartTicker(task.GetID(opts.Params.AppSlug), finishedCh) - if err := notifyUpgradeStarted(context.Background(), kbClient, opts); err != nil { - logger.Errorf("Failed to notify upgrade started: %v", err) - } - if err := embeddedcluster.StartClusterUpgrade(context.Background(), opts.KotsKinds, opts.RegistrySettings); err != nil { return errors.Wrap(err, "failed to start cluster upgrade") } @@ -158,31 +154,6 @@ func Deploy(opts DeployOptions) error { return nil } -// notifyUpgradeStarted sends a metrics event to the api that the upgrade started. -func notifyUpgradeStarted(ctx context.Context, kbClient kbclient.Client, opts DeployOptions) error { - ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient) - if err != nil { - return fmt.Errorf("failed to get current installation: %w", err) - } - - if ins.Spec.AirGap { - return nil - } - - prev, err := embeddedcluster.GetPreviousInstallation(ctx, kbClient) - if err != nil { - return errors.Wrap(err, "failed to get previous installation") - } else if prev == nil { - return errors.New("previous installation not found") - } - - err = embeddedcluster.NotifyUpgradeStarted(ctx, opts.KotsKinds.License.Spec.Endpoint, ins, prev) - if err != nil { - return errors.Wrap(err, "failed to send event") - } - return nil -} - // notifyUpgradeFailed sends a metrics event to the api that the upgrade failed. func notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Client, opts DeployOptions, reason string) error { ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient) From a36dba649c5d1732043902088b388dc9291b4af3 Mon Sep 17 00:00:00 2001 From: Ethan Mosbaugh Date: Tue, 26 Nov 2024 07:08:32 -0800 Subject: [PATCH 4/7] f --- pkg/embeddedcluster/metrics.go | 4 +++- pkg/embeddedcluster/upgrade.go | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/embeddedcluster/metrics.go b/pkg/embeddedcluster/metrics.go index 03fd113cfe..4523b0df84 100644 --- a/pkg/embeddedcluster/metrics.go +++ b/pkg/embeddedcluster/metrics.go @@ -17,6 +17,7 @@ type UpgradeStartedEvent struct { ClusterID string `json:"clusterID"` TargetVersion string `json:"targetVersion"` InitialVersion string `json:"initialVersion"` + AppVersion string `json:"appVersion"` } // UpgradeFailedEvent is send back home when the upgrade fails. @@ -35,7 +36,7 @@ type UpgradeSucceededEvent struct { } // NotifyUpgradeStarted notifies the metrics server that an upgrade has started. -func NotifyUpgradeStarted(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation) error { +func NotifyUpgradeStarted(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation, versionLabel string) error { if ins.Spec.AirGap { return nil } @@ -43,6 +44,7 @@ func NotifyUpgradeStarted(ctx context.Context, baseURL string, ins, prev *embedd ClusterID: ins.Spec.ClusterID, TargetVersion: ins.Spec.Config.Version, InitialVersion: prev.Spec.Config.Version, + AppVersion: versionLabel, }) } diff --git a/pkg/embeddedcluster/upgrade.go b/pkg/embeddedcluster/upgrade.go index dae1f3b163..796ba22f56 100644 --- a/pkg/embeddedcluster/upgrade.go +++ b/pkg/embeddedcluster/upgrade.go @@ -77,7 +77,7 @@ func startClusterUpgrade( log.Printf("Starting cluster upgrade to version %s...", newcfg.Version) // We cannot notify the upgrade started until the new install is created - if err := NotifyUpgradeStarted(ctx, license.Spec.Endpoint, newInstall, current); err != nil { + if err := NotifyUpgradeStarted(ctx, license.Spec.Endpoint, newInstall, current, versionLabel); err != nil { logger.Errorf("Failed to notify upgrade started: %v", err) } From 65867a00d9742dede692068f59ef064de21c2787 Mon Sep 17 00:00:00 2001 From: Ethan Mosbaugh Date: Tue, 26 Nov 2024 11:10:00 -0800 Subject: [PATCH 5/7] increase preflight timeout --- e2e/playwright/tests/@smoke-test/test.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/playwright/tests/@smoke-test/test.spec.ts b/e2e/playwright/tests/@smoke-test/test.spec.ts index 214517f7e0..cc7ea645e0 100644 --- a/e2e/playwright/tests/@smoke-test/test.spec.ts +++ b/e2e/playwright/tests/@smoke-test/test.spec.ts @@ -27,7 +27,7 @@ test("smoke test", async ({ page }) => { ); await page.getByRole("button", { name: "Continue" }).click(); await expect(page.locator("#app")).toContainText("Results", { - timeout: 30000, + timeout: 60 * 1000, }); await expect(page.locator("#app")).toContainText("Sequence is 0"); await page.getByRole("button", { name: "Deploy" }).click(); From ce04d56157a4f1e1da4204bc4c936b2bca4e5176 Mon Sep 17 00:00:00 2001 From: Ethan Mosbaugh Date: Tue, 26 Nov 2024 13:30:35 -0800 Subject: [PATCH 6/7] feedback --- pkg/embeddedcluster/upgrade.go | 5 ++++- pkg/operator/operator.go | 12 ++++++------ pkg/upgradeservice/deploy/deploy.go | 10 +++++----- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/pkg/embeddedcluster/upgrade.go b/pkg/embeddedcluster/upgrade.go index 796ba22f56..2868d0cc33 100644 --- a/pkg/embeddedcluster/upgrade.go +++ b/pkg/embeddedcluster/upgrade.go @@ -76,13 +76,16 @@ func startClusterUpgrade( log.Printf("Starting cluster upgrade to version %s...", newcfg.Version) - // We cannot notify the upgrade started until the new install is created + // We cannot notify the upgrade started until the new install is available if err := NotifyUpgradeStarted(ctx, license.Spec.Endpoint, newInstall, current, versionLabel); err != nil { logger.Errorf("Failed to notify upgrade started: %v", err) } err = runClusterUpgrade(ctx, k8sClient, newInstall, registrySettings, license, versionLabel) if err != nil { + if err := NotifyUpgradeFailed(ctx, license.Spec.Endpoint, newInstall, current, err.Error()); err != nil { + logger.Errorf("Failed to notify upgrade failed: %v", err) + } return fmt.Errorf("run cluster upgrade: %w", err) } diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index e5def5d443..2e750b5ef8 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -1052,14 +1052,14 @@ func (o *Operator) waitForClusterUpgrade(appID string, appSlug string) error { } if embeddedcluster.InstallationSucceeded(ctx, ins) { logger.Infof("Cluster upgrade succeeded") - if err := o.notifyUpgradeSucceeded(ctx, kbClient, ins, appID); err != nil { + if err := o.notifyClusterUpgradeSucceeded(ctx, kbClient, ins, appID); err != nil { logger.Errorf("Failed to notify upgrade succeeded: %v", err) } return nil } if embeddedcluster.InstallationFailed(ctx, ins) { logger.Infof("Cluster upgrade failed") - if err := o.notifyUpgradeFailed(ctx, kbClient, ins, appID); err != nil { + if err := o.notifyClusterUpgradeFailed(ctx, kbClient, ins, appID); err != nil { logger.Errorf("Failed to notify upgrade failed: %v", err) } if err := upgradeservicetask.SetStatusUpgradeFailed(appSlug, ins.Status.Reason); err != nil { @@ -1074,8 +1074,8 @@ func (o *Operator) waitForClusterUpgrade(appID string, appSlug string) error { } } -// notifyUpgradeSucceeded sends a metrics event to the api that the upgrade succeeded. -func (o *Operator) notifyUpgradeSucceeded(ctx context.Context, kbClient kbclient.Client, ins *embeddedclusterv1beta1.Installation, appID string) error { +// notifyClusterUpgradeSucceeded sends a metrics event to the api that the upgrade succeeded. +func (o *Operator) notifyClusterUpgradeSucceeded(ctx context.Context, kbClient kbclient.Client, ins *embeddedclusterv1beta1.Installation, appID string) error { if ins.Spec.AirGap { return nil } @@ -1099,8 +1099,8 @@ func (o *Operator) notifyUpgradeSucceeded(ctx context.Context, kbClient kbclient return nil } -// notifyUpgradeFailed sends a metrics event to the api that the upgrade failed. -func (o *Operator) notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Client, ins *embeddedclusterv1beta1.Installation, appID string) error { +// notifyClusterUpgradeFailed sends a metrics event to the api that the upgrade failed. +func (o *Operator) notifyClusterUpgradeFailed(ctx context.Context, kbClient kbclient.Client, ins *embeddedclusterv1beta1.Installation, appID string) error { if ins.Spec.AirGap { return nil } diff --git a/pkg/upgradeservice/deploy/deploy.go b/pkg/upgradeservice/deploy/deploy.go index 3bd84c42d7..6ca7b3d7da 100644 --- a/pkg/upgradeservice/deploy/deploy.go +++ b/pkg/upgradeservice/deploy/deploy.go @@ -120,9 +120,6 @@ func Deploy(opts DeployOptions) error { go func() (finalError error) { defer func() { if finalError != nil { - if err := notifyUpgradeFailed(context.Background(), kbClient, opts, finalError.Error()); err != nil { - logger.Errorf("Failed to notify upgrade failed: %v", err) - } if err := task.SetStatusUpgradeFailed(opts.Params.AppSlug, finalError.Error()); err != nil { logger.Error(errors.Wrap(err, "failed to set task status to upgrade failed")) } @@ -145,6 +142,9 @@ func Deploy(opts DeployOptions) error { tgzArchiveKey: tgzArchiveKey, requiresClusterUpgrade: true, }); err != nil { + if err := notifyClusterUpgradeFailed(context.Background(), kbClient, opts, finalError.Error()); err != nil { + logger.Errorf("Failed to notify upgrade failed: %v", err) + } return errors.Wrap(err, "failed to create deployment") } @@ -154,8 +154,8 @@ func Deploy(opts DeployOptions) error { return nil } -// notifyUpgradeFailed sends a metrics event to the api that the upgrade failed. -func notifyUpgradeFailed(ctx context.Context, kbClient kbclient.Client, opts DeployOptions, reason string) error { +// notifyClusterUpgradeFailed sends a metrics event to the api that the upgrade failed. +func notifyClusterUpgradeFailed(ctx context.Context, kbClient kbclient.Client, opts DeployOptions, reason string) error { ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient) if err != nil { return fmt.Errorf("failed to get current installation: %w", err) From 6547471f190c6a47898cfa98c8e37ab07e9b81bc Mon Sep 17 00:00:00 2001 From: Ethan Mosbaugh Date: Tue, 26 Nov 2024 13:34:57 -0800 Subject: [PATCH 7/7] explain --- pkg/upgradeservice/deploy/deploy.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/upgradeservice/deploy/deploy.go b/pkg/upgradeservice/deploy/deploy.go index 6ca7b3d7da..0c00795735 100644 --- a/pkg/upgradeservice/deploy/deploy.go +++ b/pkg/upgradeservice/deploy/deploy.go @@ -142,6 +142,8 @@ func Deploy(opts DeployOptions) error { tgzArchiveKey: tgzArchiveKey, requiresClusterUpgrade: true, }); err != nil { + // The operator is responsible for notifying of upgrade success/failure using the deployment. + // If we cannot create the deployment, the operator cannot take over and we need to notify of failure here. if err := notifyClusterUpgradeFailed(context.Background(), kbClient, opts, finalError.Error()); err != nil { logger.Errorf("Failed to notify upgrade failed: %v", err) }