Skip to content

Commit

Permalink
feat: reporting back individual node upgrades
Browse files Browse the repository at this point in the history
  • Loading branch information
ricardomaraschini committed Oct 4, 2023
1 parent 0b0442f commit 771dbeb
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 7 deletions.
7 changes: 7 additions & 0 deletions cmd/helmvm/join.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ var joinCommand = &cli.Command{
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
fpath := defaults.PathToConfig(".cluster-id")
cid := hvmtoken.ClusterID.String()
if err := os.WriteFile(fpath, []byte(cid), 0644); err != nil {
err := fmt.Errorf("unable to write cluster id to disk: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
metrics.ReportJoinSucceeded(c.Context, hvmtoken.ClusterID)
return nil
},
Expand Down
33 changes: 26 additions & 7 deletions cmd/helmvm/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/replicatedhq/helmvm/pkg/addons"
"github.com/replicatedhq/helmvm/pkg/defaults"
"github.com/replicatedhq/helmvm/pkg/goods"
"github.com/replicatedhq/helmvm/pkg/metrics"
"github.com/replicatedhq/helmvm/pkg/preflights"
"github.com/replicatedhq/helmvm/pkg/prompts"
)
Expand Down Expand Up @@ -103,34 +104,49 @@ var upgradeCommand = &cli.Command{
},
},
Action: func(c *cli.Context) error {
metrics.ReportNodeUpgradeStarted(c.Context)
if err := canRunUpgrade(c); err != nil {
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
logrus.Infof("Materializing binaries")
if err := goods.Materialize(); err != nil {
return fmt.Errorf("unable to materialize binaries: %w", err)
err := fmt.Errorf("unable to materialize binaries: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
if err := runHostPreflightsLocally(c); err != nil {
return fmt.Errorf("unable to run host preflights locally: %w", err)
err := fmt.Errorf("unable to run host preflights locally: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
logrus.Infof("Stopping %s", defaults.BinaryName())
if err := stopHelmVM(); err != nil {
return fmt.Errorf("unable to stop: %w", err)
err := fmt.Errorf("unable to stop: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
logrus.Infof("Installing binary")
if err := installK0sBinary(); err != nil {
return fmt.Errorf("unable to install k0s binary: %w", err)
err := fmt.Errorf("unable to install k0s binary: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
logrus.Infof("Starting service")
if err := startK0sService(); err != nil {
return fmt.Errorf("unable to start service: %w", err)
err := fmt.Errorf("unable to start k0s service: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
kcfg := defaults.PathToConfig("kubeconfig")
if _, err := os.Stat(kcfg); err != nil {
if os.IsNotExist(err) {
metrics.ReportNodeUpgradeSucceeded(c.Context)
return nil
}
return fmt.Errorf("unable to stat kubeconfig: %w", err)
err := fmt.Errorf("unable to read kubeconfig: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
os.Setenv("KUBECONFIG", kcfg)
logrus.Infof("Upgrading addons")
Expand All @@ -142,8 +158,11 @@ var upgradeCommand = &cli.Command{
opts = append(opts, addons.WithoutAddon(addon))
}
if err := addons.NewApplier(opts...).Apply(c.Context); err != nil {
return fmt.Errorf("unable to apply addons: %w", err)
err := fmt.Errorf("unable to apply addons: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
metrics.ReportNodeUpgradeSucceeded(c.Context)
logrus.Infof("Upgrade complete")
return nil
},
Expand Down
27 changes: 27 additions & 0 deletions pkg/metrics/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,30 @@ type JoinFailed struct {
func (e JoinFailed) Title() string {
return "JoinFailed"
}

// NodeUpgradeStarted event is send back home when a node upgrade
// starts.
type NodeUpgradeStarted JoinStarted

// Title returns the name of the event.
func (e NodeUpgradeStarted) Title() string {
return "NodeUpgradeStarted"
}

// NodeUpgradeSucceeded event is send back home when a node upgrade
// succeeds.
type NodeUpgradeSucceeded NodeUpgradeStarted

// Title returns the name of the event.
func (e NodeUpgradeSucceeded) Title() string {
return "NodeUpgradeSucceeded"
}

// NodeUpgradeFailed event is send back home when a node upgrade
// fails.
type NodeUpgradeFailed JoinFailed

// Title returns the name of the event.
func (e NodeUpgradeFailed) Title() string {
return "NodeUpgradeFailed"
}
30 changes: 30 additions & 0 deletions pkg/metrics/reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,33 @@ func ReportApplyFinished(c *cli.Context, err error) {
}
ReportInstallationSuceeded(ctx)
}

// ReportNodeUpgradeStarted reports that a node upgrade has started.
func ReportNodeUpgradeStarted(ctx context.Context) {
hostname, err := os.Hostname()
if err != nil {
logrus.Warnf("unable to get hostname: %s", err)
hostname = "unknown"
}
Send(ctx, NodeUpgradeStarted{ClusterID(), hostname})
}

// ReportNodeUpgradeSucceeded reports that a node upgrade has finished successfully.
func ReportNodeUpgradeSucceeded(ctx context.Context) {
hostname, err := os.Hostname()
if err != nil {
logrus.Warnf("unable to get hostname: %s", err)
hostname = "unknown"
}
Send(ctx, NodeUpgradeSucceeded{ClusterID(), hostname})
}

// ReportNodeUpgradeFailed reports that node upgrade has failed.
func ReportNodeUpgradeFailed(ctx context.Context, exterr error) {
hostname, err := os.Hostname()
if err != nil {
logrus.Warnf("unable to get hostname: %s", err)
hostname = "unknown"
}
Send(ctx, NodeUpgradeFailed{ClusterID(), hostname, exterr.Error()})
}
22 changes: 22 additions & 0 deletions pkg/metrics/sender_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,28 @@ func TestSend(t *testing.T) {
Reason: "bar",
},
},
{
name: "NodeUpgradeStarted",
event: NodeUpgradeStarted{
ClusterID: uuid.New(),
NodeName: "foo",
},
},
{
name: "NodeUpgradeSucceeded",
event: NodeUpgradeSucceeded{
ClusterID: uuid.New(),
NodeName: "foo",
},
},
{
name: "NodeUpgradeFailed",
event: NodeUpgradeFailed{
ClusterID: uuid.New(),
NodeName: "foo",
Reason: "bar",
},
},
} {
t.Run(tt.name, func(t *testing.T) {
payload := map[string]Event{"event": tt.event}
Expand Down

0 comments on commit 771dbeb

Please sign in to comment.