Skip to content

Commit

Permalink
feat: reporting back installation and upgrade metrics (#92)
Browse files Browse the repository at this point in the history
* feat: reporting installation and upgrade metrics

reporting back installation and upgrades data.

* chore: renamed event properties

* chore: small fixes

* bug: fixed tests

* feat: reporting back individual node upgrades

* chore: fix some linting complains

* chore: small fixed after reviewing the pr

* chore: implementing decode and encode in the jointoken type

* chore: using debugf instead of infof

we are using debugf in all other logrus calls except one.

* chore: add timeout to dnf commands and print test errors
  • Loading branch information
ricardomaraschini authored Oct 5, 2023
1 parent 4cf2934 commit ab69c30
Show file tree
Hide file tree
Showing 19 changed files with 716 additions and 48 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ TERRAFORM_VERSION = 1.5.4
OPENEBS_VERSION = 3.7.0
K0S_VERSION = v1.27.5+k0s.0
TROUBLESHOOT_VERSION = v0.72.0
LD_FLAGS = -X github.com/replicatedhq/helmvm/pkg/defaults.K0sVersion=$(K0S_VERSION) -X main.Version=$(VERSION)
LD_FLAGS = -X github.com/replicatedhq/helmvm/pkg/defaults.K0sVersion=$(K0S_VERSION) -X github.com/replicatedhq/helmvm/pkg/defaults.Version=$(VERSION)

default: helmvm-linux-amd64

Expand Down
28 changes: 22 additions & 6 deletions cmd/helmvm/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/replicatedhq/helmvm/pkg/defaults"
"github.com/replicatedhq/helmvm/pkg/goods"
"github.com/replicatedhq/helmvm/pkg/infra"
"github.com/replicatedhq/helmvm/pkg/metrics"
"github.com/replicatedhq/helmvm/pkg/preflights"
pb "github.com/replicatedhq/helmvm/pkg/progressbar"
"github.com/replicatedhq/helmvm/pkg/prompts"
Expand Down Expand Up @@ -396,33 +397,43 @@ var installCommand = &cli.Command{
},
},
Action: func(c *cli.Context) error {
metrics.ReportApplyStarted(c)
if defaults.DecentralizedInstall() {
fmt.Println("Decentralized install was detected. To manage the cluster")
fmt.Printf("you have to use the '%s node' commands instead.\n", defaults.BinaryName())
fmt.Printf("Run '%s node --help' for more information.\n", defaults.BinaryName())
metrics.ReportApplyFinished(c, fmt.Errorf("wrong upgrade on decentralized install"))
return fmt.Errorf("decentralized install detected")
}
useprompt := !c.Bool("no-prompt")
logrus.Infof("Materializing binaries")
if err := goods.Materialize(); err != nil {
return fmt.Errorf("unable to materialize binaries: %w", err)
err := fmt.Errorf("unable to materialize binaries: %w", err)
metrics.ReportApplyFinished(c, err)
return err
}
if !c.Bool("addons-only") {
var err error
var nodes []infra.Node
if dir := c.String("infra"); dir != "" {
logrus.Infof("Processing infrastructure manifests")
if nodes, err = infra.Apply(c.Context, dir, useprompt); err != nil {
return fmt.Errorf("unable to create infra: %w", err)
err := fmt.Errorf("unable to create infra: %w", err)
metrics.ReportApplyFinished(c, err)
return err
}
}
if err := applyK0sctl(c, useprompt, nodes); err != nil {
return fmt.Errorf("unable update cluster: %w", err)
err := fmt.Errorf("unable update cluster: %w", err)
metrics.ReportApplyFinished(c, err)
return err
}
}
logrus.Infof("Reading cluster access configuration")
if err := runK0sctlKubeconfig(c.Context); err != nil {
return fmt.Errorf("unable to get kubeconfig: %w", err)
err := fmt.Errorf("unable to get kubeconfig: %w", err)
metrics.ReportApplyFinished(c, err)
return err
}
logrus.Infof("Applying add-ons")
ccfg := defaults.PathToConfig("k0sctl.yaml")
Expand All @@ -436,16 +447,21 @@ var installCommand = &cli.Command{
opts = append(opts, addons.WithoutAddon(addon))
}
if err := addons.NewApplier(opts...).Apply(c.Context); err != nil {
return fmt.Errorf("unable to apply addons: %w", err)
err := fmt.Errorf("unable to apply addons: %w", err)
metrics.ReportApplyFinished(c, err)
return err
}
if err := runPostApply(c.Context); err != nil {
return fmt.Errorf("unable to run post apply: %w", err)
err := fmt.Errorf("unable to run post apply: %w", err)
metrics.ReportApplyFinished(c, err)
return err
}
fmt.Println("Cluster configuration has been applied")
fmt.Printf("Kubeconfig file has been placed at at %s\n", kcfg)
fmt.Printf("Cluster configuration file has been placed at %s\n", ccfg)
fmt.Println("You can now access your cluster with kubectl by running:")
fmt.Printf(" %s shell\n", os.Args[0])
metrics.ReportApplyFinished(c, nil)
return nil
},
}
66 changes: 43 additions & 23 deletions cmd/helmvm/join.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,49 +13,75 @@ import (

"github.com/replicatedhq/helmvm/pkg/defaults"
"github.com/replicatedhq/helmvm/pkg/goods"
"github.com/replicatedhq/helmvm/pkg/metrics"
)

var joinCommand = &cli.Command{
Name: "join",
Usage: "Join the current node to an existing cluster",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "role",
Usage: "The role of the node (can be controller or worker)",
Value: "worker",
},
},
Action: func(c *cli.Context) error {
binname := defaults.BinaryName()
if c.Args().Len() != 1 {
return fmt.Errorf("usage: %s node join <token>", binname)
}
var hvmtoken JoinToken
if err := hvmtoken.Decode(c.Args().First()); err != nil {
return fmt.Errorf("unable to decode join token: %w", err)
}
metrics.ReportJoinStarted(c.Context, hvmtoken.ClusterID)
if err := canRunJoin(c); err != nil {
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
logrus.Infof("Materializing binaries")
if err := goods.Materialize(); err != nil {
return fmt.Errorf("unable to materialize binaries: %w", err)
err := fmt.Errorf("unable to materialize binaries: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
if err := runHostPreflightsLocally(c); err != nil {
return fmt.Errorf("unable to run host preflights locally: %w", err)
err := fmt.Errorf("unable to run host preflights locally: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
logrus.Infof("Saving token to disk")
if err := saveTokenToDisk(c.Args().First()); err != nil {
return fmt.Errorf("unable to save token to disk: %w", err)
if err := saveTokenToDisk(hvmtoken.Token); err != nil {
err := fmt.Errorf("unable to save token to disk: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
logrus.Infof("Installing binary")
if err := installK0sBinary(); err != nil {
return fmt.Errorf("unable to install k0s binary: %w", err)
err := fmt.Errorf("unable to install k0s binary: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
logrus.Infof("Joining node to cluster")
if err := runK0sInstallCommand(c.String("role")); err != nil {
return fmt.Errorf("unable to join node to cluster: %w", err)
if err := runK0sInstallCommand(hvmtoken.Role); err != nil {
err := fmt.Errorf("unable to join node to cluster: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
logrus.Infof("Creating systemd unit file")
if err := createSystemdUnitFile(c.String("role")); err != nil {
return fmt.Errorf("unable to create systemd unit file: %w", err)
if err := createSystemdUnitFile(hvmtoken.Role); err != nil {
err := fmt.Errorf("unable to create systemd unit file: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
logrus.Infof("Starting service")
if err := startK0sService(); err != nil {
return fmt.Errorf("unable to start service: %w", err)
err := fmt.Errorf("unable to start service: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
fpath := defaults.PathToConfig(".cluster-id")
cid := hvmtoken.ClusterID.String()
if err := os.WriteFile(fpath, []byte(cid), 0644); err != nil {
err := fmt.Errorf("unable to write cluster id to disk: %w", err)
metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err)
return err
}
metrics.ReportJoinSucceeded(c.Context, hvmtoken.ClusterID)
return nil
},
}
Expand Down Expand Up @@ -115,12 +141,6 @@ func canRunJoin(c *cli.Context) error {
if os.Getuid() != 0 {
return fmt.Errorf("join command must be run as root")
}
if c.Args().Len() != 1 {
return fmt.Errorf("usage: %s node join <token>", defaults.BinaryName())
}
if role := c.String("role"); role != "controller" && role != "worker" {
return fmt.Errorf("role must be either controller or worker")
}
return nil
}

Expand Down
39 changes: 37 additions & 2 deletions cmd/helmvm/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,21 @@ package main

import (
"bytes"
"encoding/base64"
"encoding/json"
"fmt"
"io"
"os"
"os/exec"
"runtime"
"time"

"github.com/google/uuid"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"

"github.com/replicatedhq/helmvm/pkg/defaults"
"github.com/replicatedhq/helmvm/pkg/metrics"
"github.com/replicatedhq/helmvm/pkg/prompts"
)

Expand All @@ -22,6 +26,32 @@ var tokenCommands = &cli.Command{
Subcommands: []*cli.Command{tokenCreateCommand},
}

// JoinToken is a struct that holds both the actual token and the cluster id. This is marshaled
// and base64 encoded and used as argument to the join command in the other nodes.
type JoinToken struct {
ClusterID uuid.UUID `json:"clusterID"`
Token string `json:"token"`
Role string `json:"role"`
}

// Decode decodes a base64 encoded JoinToken.
func (j *JoinToken) Decode(b64 string) error {
decoded, err := base64.StdEncoding.DecodeString(b64)
if err != nil {
return err
}
return json.Unmarshal(decoded, j)
}

// Encode encodes a JoinToken to base64.
func (j *JoinToken) Encode() (string, error) {
b, err := json.Marshal(j)
if err != nil {
return "", err
}
return base64.StdEncoding.EncodeToString(b), nil
}

var tokenCreateCommand = &cli.Command{
Name: "create",
Usage: "Creates a new node join token",
Expand Down Expand Up @@ -83,14 +113,19 @@ var tokenCreateCommand = &cli.Command{
}
if !defaults.DecentralizedInstall() {
if err := defaults.SetInstallAsDecentralized(); err != nil {
return fmt.Errorf("failed to set decentralized install: %w", err)
return fmt.Errorf("unable to set decentralized install: %w", err)
}
}
token := JoinToken{metrics.ClusterID(), buf.String(), role}
b64token, err := token.Encode()
if err != nil {
return fmt.Errorf("unable to encode token: %w", err)
}
fmt.Println("Token created successfully.")
fmt.Printf("This token is valid for %s hours.\n", dur)
fmt.Println("You can now run the following command in a remote node to add it")
fmt.Printf("to the cluster as a %q node:\n", role)
fmt.Printf("%s node join --role %s %s", defaults.BinaryName(), role, buf.String())
fmt.Printf("%s node join %s\n", defaults.BinaryName(), b64token)
return nil
},
}
33 changes: 26 additions & 7 deletions cmd/helmvm/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/replicatedhq/helmvm/pkg/addons"
"github.com/replicatedhq/helmvm/pkg/defaults"
"github.com/replicatedhq/helmvm/pkg/goods"
"github.com/replicatedhq/helmvm/pkg/metrics"
"github.com/replicatedhq/helmvm/pkg/preflights"
"github.com/replicatedhq/helmvm/pkg/prompts"
)
Expand Down Expand Up @@ -103,34 +104,49 @@ var upgradeCommand = &cli.Command{
},
},
Action: func(c *cli.Context) error {
metrics.ReportNodeUpgradeStarted(c.Context)
if err := canRunUpgrade(c); err != nil {
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
logrus.Infof("Materializing binaries")
if err := goods.Materialize(); err != nil {
return fmt.Errorf("unable to materialize binaries: %w", err)
err := fmt.Errorf("unable to materialize binaries: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
if err := runHostPreflightsLocally(c); err != nil {
return fmt.Errorf("unable to run host preflights locally: %w", err)
err := fmt.Errorf("unable to run host preflights locally: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
logrus.Infof("Stopping %s", defaults.BinaryName())
if err := stopHelmVM(); err != nil {
return fmt.Errorf("unable to stop: %w", err)
err := fmt.Errorf("unable to stop: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
logrus.Infof("Installing binary")
if err := installK0sBinary(); err != nil {
return fmt.Errorf("unable to install k0s binary: %w", err)
err := fmt.Errorf("unable to install k0s binary: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
logrus.Infof("Starting service")
if err := startK0sService(); err != nil {
return fmt.Errorf("unable to start service: %w", err)
err := fmt.Errorf("unable to start service: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
kcfg := defaults.PathToConfig("kubeconfig")
if _, err := os.Stat(kcfg); err != nil {
if os.IsNotExist(err) {
metrics.ReportNodeUpgradeSucceeded(c.Context)
return nil
}
return fmt.Errorf("unable to stat kubeconfig: %w", err)
err := fmt.Errorf("unable to stat kubeconfig: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
os.Setenv("KUBECONFIG", kcfg)
logrus.Infof("Upgrading addons")
Expand All @@ -142,8 +158,11 @@ var upgradeCommand = &cli.Command{
opts = append(opts, addons.WithoutAddon(addon))
}
if err := addons.NewApplier(opts...).Apply(c.Context); err != nil {
return fmt.Errorf("unable to apply addons: %w", err)
err := fmt.Errorf("unable to apply addons: %w", err)
metrics.ReportNodeUpgradeFailed(c.Context, err)
return err
}
metrics.ReportNodeUpgradeSucceeded(c.Context)
logrus.Infof("Upgrade complete")
return nil
},
Expand Down
4 changes: 1 addition & 3 deletions cmd/helmvm/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ import (
"github.com/replicatedhq/helmvm/pkg/defaults"
)

var Version = "v0.0.0"

var versionCommand = &cli.Command{
Name: "version",
Usage: fmt.Sprintf("Shows the %s installer version", defaults.BinaryName()),
Expand All @@ -24,7 +22,7 @@ var versionCommand = &cli.Command{
}
writer := table.NewWriter()
writer.AppendHeader(table.Row{"component", "version"})
writer.AppendRow(table.Row{"Installer", Version})
writer.AppendRow(table.Row{"Installer", defaults.Version})
writer.AppendRow(table.Row{"Kubernetes", defaults.K0sVersion})
for name, version := range versions {
if !strings.HasPrefix(version, "v") {
Expand Down
2 changes: 1 addition & 1 deletion e2e/install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ func TestHostPreflight(t *testing.T) {
defer tc.Destroy()
t.Log("installing ssh and binutils on node 0")
commands := [][]string{
{"dnf", "install", "-y", "openssh-server", "binutils", "tar"},
{"dnf", "--setopt=metadata_expire=120", "install", "-y", "openssh-server", "binutils", "tar"},
{"systemctl", "enable", "sshd"},
{"systemctl", "start", "sshd"},
}
Expand Down
7 changes: 6 additions & 1 deletion e2e/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,22 @@ func (b *buffer) Close() error {
return nil
}

// RunCommandsOnNode runs a series of commands on a node.
func RunCommandsOnNode(t *testing.T, cl *cluster.Output, node int, cmds [][]string) error {
for _, cmd := range cmds {
cmdstr := strings.Join(cmd, " ")
t.Logf("running `%s` node %d", cmdstr, node)
if _, _, err := RunCommandOnNode(t, cl, node, cmd); err != nil {
stdout, stderr, err := RunCommandOnNode(t, cl, node, cmd)
if err != nil {
t.Logf("stdout:\n%s", stdout)
t.Logf("stderr:\n%s", stderr)
return err
}
}
return nil
}

// RunCommandOnNode runs a command on a node with a timeout.
func RunCommandOnNode(t *testing.T, cl *cluster.Output, node int, line []string) (string, string, error) {
stdout := &buffer{bytes.NewBuffer(nil)}
stderr := &buffer{bytes.NewBuffer(nil)}
Expand Down
Loading

0 comments on commit ab69c30

Please sign in to comment.