From 771dbebf7655d59aff84a4e47c7a7bd2d75f3125 Mon Sep 17 00:00:00 2001 From: Ricardo Maraschini Date: Tue, 3 Oct 2023 20:53:33 +0200 Subject: [PATCH] feat: reporting back individual node upgrades --- cmd/helmvm/join.go | 7 +++++++ cmd/helmvm/upgrade.go | 33 ++++++++++++++++++++++++++------- pkg/metrics/events.go | 27 +++++++++++++++++++++++++++ pkg/metrics/reporter.go | 30 ++++++++++++++++++++++++++++++ pkg/metrics/sender_test.go | 22 ++++++++++++++++++++++ 5 files changed, 112 insertions(+), 7 deletions(-) diff --git a/cmd/helmvm/join.go b/cmd/helmvm/join.go index 7af486ab5..ef4ebcbb6 100644 --- a/cmd/helmvm/join.go +++ b/cmd/helmvm/join.go @@ -75,6 +75,13 @@ var joinCommand = &cli.Command{ metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) return err } + fpath := defaults.PathToConfig(".cluster-id") + cid := hvmtoken.ClusterID.String() + if err := os.WriteFile(fpath, []byte(cid), 0644); err != nil { + err := fmt.Errorf("unable to write cluster id to disk: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err + } metrics.ReportJoinSucceeded(c.Context, hvmtoken.ClusterID) return nil }, diff --git a/cmd/helmvm/upgrade.go b/cmd/helmvm/upgrade.go index a137ad0d9..03eb7e94a 100644 --- a/cmd/helmvm/upgrade.go +++ b/cmd/helmvm/upgrade.go @@ -13,6 +13,7 @@ import ( "github.com/replicatedhq/helmvm/pkg/addons" "github.com/replicatedhq/helmvm/pkg/defaults" "github.com/replicatedhq/helmvm/pkg/goods" + "github.com/replicatedhq/helmvm/pkg/metrics" "github.com/replicatedhq/helmvm/pkg/preflights" "github.com/replicatedhq/helmvm/pkg/prompts" ) @@ -103,34 +104,49 @@ var upgradeCommand = &cli.Command{ }, }, Action: func(c *cli.Context) error { + metrics.ReportNodeUpgradeStarted(c.Context) if err := canRunUpgrade(c); err != nil { + metrics.ReportNodeUpgradeFailed(c.Context, err) return err } logrus.Infof("Materializing binaries") if err := goods.Materialize(); err != nil { - return fmt.Errorf("unable to materialize binaries: %w", err) + err := fmt.Errorf("unable to materialize binaries: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } if err := runHostPreflightsLocally(c); err != nil { - return fmt.Errorf("unable to run host preflights locally: %w", err) + err := fmt.Errorf("unable to run host preflights locally: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } logrus.Infof("Stopping %s", defaults.BinaryName()) if err := stopHelmVM(); err != nil { - return fmt.Errorf("unable to stop: %w", err) + err := fmt.Errorf("unable to stop: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } logrus.Infof("Installing binary") if err := installK0sBinary(); err != nil { - return fmt.Errorf("unable to install k0s binary: %w", err) + err := fmt.Errorf("unable to install k0s binary: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } logrus.Infof("Starting service") if err := startK0sService(); err != nil { - return fmt.Errorf("unable to start service: %w", err) + err := fmt.Errorf("unable to start k0s service: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } kcfg := defaults.PathToConfig("kubeconfig") if _, err := os.Stat(kcfg); err != nil { if os.IsNotExist(err) { + metrics.ReportNodeUpgradeSucceeded(c.Context) return nil } - return fmt.Errorf("unable to stat kubeconfig: %w", err) + err := fmt.Errorf("unable to read kubeconfig: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } os.Setenv("KUBECONFIG", kcfg) logrus.Infof("Upgrading addons") @@ -142,8 +158,11 @@ var upgradeCommand = &cli.Command{ opts = append(opts, addons.WithoutAddon(addon)) } if err := addons.NewApplier(opts...).Apply(c.Context); err != nil { - return fmt.Errorf("unable to apply addons: %w", err) + err := fmt.Errorf("unable to apply addons: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } + metrics.ReportNodeUpgradeSucceeded(c.Context) logrus.Infof("Upgrade complete") return nil }, diff --git a/pkg/metrics/events.go b/pkg/metrics/events.go index 897843ee3..88215f26b 100644 --- a/pkg/metrics/events.go +++ b/pkg/metrics/events.go @@ -100,3 +100,30 @@ type JoinFailed struct { func (e JoinFailed) Title() string { return "JoinFailed" } + +// NodeUpgradeStarted event is send back home when a node upgrade +// starts. +type NodeUpgradeStarted JoinStarted + +// Title returns the name of the event. +func (e NodeUpgradeStarted) Title() string { + return "NodeUpgradeStarted" +} + +// NodeUpgradeSucceeded event is send back home when a node upgrade +// succeeds. +type NodeUpgradeSucceeded NodeUpgradeStarted + +// Title returns the name of the event. +func (e NodeUpgradeSucceeded) Title() string { + return "NodeUpgradeSucceeded" +} + +// NodeUpgradeFailed event is send back home when a node upgrade +// fails. +type NodeUpgradeFailed JoinFailed + +// Title returns the name of the event. +func (e NodeUpgradeFailed) Title() string { + return "NodeUpgradeFailed" +} diff --git a/pkg/metrics/reporter.go b/pkg/metrics/reporter.go index 9c88b6086..dbfae3f5c 100644 --- a/pkg/metrics/reporter.go +++ b/pkg/metrics/reporter.go @@ -178,3 +178,33 @@ func ReportApplyFinished(c *cli.Context, err error) { } ReportInstallationSuceeded(ctx) } + +// ReportNodeUpgradeStarted reports that a node upgrade has started. +func ReportNodeUpgradeStarted(ctx context.Context) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, NodeUpgradeStarted{ClusterID(), hostname}) +} + +// ReportNodeUpgradeSucceeded reports that a node upgrade has finished successfully. +func ReportNodeUpgradeSucceeded(ctx context.Context) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, NodeUpgradeSucceeded{ClusterID(), hostname}) +} + +// ReportNodeUpgradeFailed reports that node upgrade has failed. +func ReportNodeUpgradeFailed(ctx context.Context, exterr error) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, NodeUpgradeFailed{ClusterID(), hostname, exterr.Error()}) +} diff --git a/pkg/metrics/sender_test.go b/pkg/metrics/sender_test.go index 57e16db58..43f90eccb 100644 --- a/pkg/metrics/sender_test.go +++ b/pkg/metrics/sender_test.go @@ -89,6 +89,28 @@ func TestSend(t *testing.T) { Reason: "bar", }, }, + { + name: "NodeUpgradeStarted", + event: NodeUpgradeStarted{ + ClusterID: uuid.New(), + NodeName: "foo", + }, + }, + { + name: "NodeUpgradeSucceeded", + event: NodeUpgradeSucceeded{ + ClusterID: uuid.New(), + NodeName: "foo", + }, + }, + { + name: "NodeUpgradeFailed", + event: NodeUpgradeFailed{ + ClusterID: uuid.New(), + NodeName: "foo", + Reason: "bar", + }, + }, } { t.Run(tt.name, func(t *testing.T) { payload := map[string]Event{"event": tt.event}