From ab69c306584f85b11c0c0690dd6a7a2d3a3186d7 Mon Sep 17 00:00:00 2001 From: Ricardo Maraschini Date: Thu, 5 Oct 2023 15:59:35 +0200 Subject: [PATCH] feat: reporting back installation and upgrade metrics (#92) * feat: reporting installation and upgrade metrics reporting back installation and upgrades data. * chore: renamed event properties * chore: small fixes * bug: fixed tests * feat: reporting back individual node upgrades * chore: fix some linting complains * chore: small fixed after reviewing the pr * chore: implementing decode and encode in the jointoken type * chore: using debugf instead of infof we are using debugf in all other logrus calls except one. * chore: add timeout to dnf commands and print test errors --- Makefile | 2 +- cmd/helmvm/install.go | 28 +++- cmd/helmvm/join.go | 66 +++++--- cmd/helmvm/token.go | 39 ++++- cmd/helmvm/upgrade.go | 33 +++- cmd/helmvm/version.go | 4 +- e2e/install_test.go | 2 +- e2e/utils.go | 7 +- pkg/addons/adminconsole/adminconsole.go | 2 +- pkg/addons/adminconsole/customize.go | 2 +- pkg/addons/applier.go | 2 +- pkg/defaults/defaults.go | 8 + pkg/defaults/provider.go | 8 + pkg/defaults/provider_test.go | 12 ++ pkg/metrics/events.go | 129 +++++++++++++++ pkg/metrics/reporter.go | 206 ++++++++++++++++++++++++ pkg/metrics/sender.go | 70 ++++++++ pkg/metrics/sender_test.go | 142 ++++++++++++++++ pkg/progressbar/progressbar.go | 2 +- 19 files changed, 716 insertions(+), 48 deletions(-) create mode 100644 pkg/metrics/events.go create mode 100644 pkg/metrics/reporter.go create mode 100644 pkg/metrics/sender.go create mode 100644 pkg/metrics/sender_test.go diff --git a/Makefile b/Makefile index d71488f5c..14adb938c 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ TERRAFORM_VERSION = 1.5.4 OPENEBS_VERSION = 3.7.0 K0S_VERSION = v1.27.5+k0s.0 TROUBLESHOOT_VERSION = v0.72.0 -LD_FLAGS = -X github.com/replicatedhq/helmvm/pkg/defaults.K0sVersion=$(K0S_VERSION) -X main.Version=$(VERSION) +LD_FLAGS = -X github.com/replicatedhq/helmvm/pkg/defaults.K0sVersion=$(K0S_VERSION) -X github.com/replicatedhq/helmvm/pkg/defaults.Version=$(VERSION) default: helmvm-linux-amd64 diff --git a/cmd/helmvm/install.go b/cmd/helmvm/install.go index ea19d9942..47ba2256e 100644 --- a/cmd/helmvm/install.go +++ b/cmd/helmvm/install.go @@ -24,6 +24,7 @@ import ( "github.com/replicatedhq/helmvm/pkg/defaults" "github.com/replicatedhq/helmvm/pkg/goods" "github.com/replicatedhq/helmvm/pkg/infra" + "github.com/replicatedhq/helmvm/pkg/metrics" "github.com/replicatedhq/helmvm/pkg/preflights" pb "github.com/replicatedhq/helmvm/pkg/progressbar" "github.com/replicatedhq/helmvm/pkg/prompts" @@ -396,16 +397,20 @@ var installCommand = &cli.Command{ }, }, Action: func(c *cli.Context) error { + metrics.ReportApplyStarted(c) if defaults.DecentralizedInstall() { fmt.Println("Decentralized install was detected. To manage the cluster") fmt.Printf("you have to use the '%s node' commands instead.\n", defaults.BinaryName()) fmt.Printf("Run '%s node --help' for more information.\n", defaults.BinaryName()) + metrics.ReportApplyFinished(c, fmt.Errorf("wrong upgrade on decentralized install")) return fmt.Errorf("decentralized install detected") } useprompt := !c.Bool("no-prompt") logrus.Infof("Materializing binaries") if err := goods.Materialize(); err != nil { - return fmt.Errorf("unable to materialize binaries: %w", err) + err := fmt.Errorf("unable to materialize binaries: %w", err) + metrics.ReportApplyFinished(c, err) + return err } if !c.Bool("addons-only") { var err error @@ -413,16 +418,22 @@ var installCommand = &cli.Command{ if dir := c.String("infra"); dir != "" { logrus.Infof("Processing infrastructure manifests") if nodes, err = infra.Apply(c.Context, dir, useprompt); err != nil { - return fmt.Errorf("unable to create infra: %w", err) + err := fmt.Errorf("unable to create infra: %w", err) + metrics.ReportApplyFinished(c, err) + return err } } if err := applyK0sctl(c, useprompt, nodes); err != nil { - return fmt.Errorf("unable update cluster: %w", err) + err := fmt.Errorf("unable update cluster: %w", err) + metrics.ReportApplyFinished(c, err) + return err } } logrus.Infof("Reading cluster access configuration") if err := runK0sctlKubeconfig(c.Context); err != nil { - return fmt.Errorf("unable to get kubeconfig: %w", err) + err := fmt.Errorf("unable to get kubeconfig: %w", err) + metrics.ReportApplyFinished(c, err) + return err } logrus.Infof("Applying add-ons") ccfg := defaults.PathToConfig("k0sctl.yaml") @@ -436,16 +447,21 @@ var installCommand = &cli.Command{ opts = append(opts, addons.WithoutAddon(addon)) } if err := addons.NewApplier(opts...).Apply(c.Context); err != nil { - return fmt.Errorf("unable to apply addons: %w", err) + err := fmt.Errorf("unable to apply addons: %w", err) + metrics.ReportApplyFinished(c, err) + return err } if err := runPostApply(c.Context); err != nil { - return fmt.Errorf("unable to run post apply: %w", err) + err := fmt.Errorf("unable to run post apply: %w", err) + metrics.ReportApplyFinished(c, err) + return err } fmt.Println("Cluster configuration has been applied") fmt.Printf("Kubeconfig file has been placed at at %s\n", kcfg) fmt.Printf("Cluster configuration file has been placed at %s\n", ccfg) fmt.Println("You can now access your cluster with kubectl by running:") fmt.Printf(" %s shell\n", os.Args[0]) + metrics.ReportApplyFinished(c, nil) return nil }, } diff --git a/cmd/helmvm/join.go b/cmd/helmvm/join.go index 3b55c26e6..81fb51482 100644 --- a/cmd/helmvm/join.go +++ b/cmd/helmvm/join.go @@ -13,49 +13,75 @@ import ( "github.com/replicatedhq/helmvm/pkg/defaults" "github.com/replicatedhq/helmvm/pkg/goods" + "github.com/replicatedhq/helmvm/pkg/metrics" ) var joinCommand = &cli.Command{ Name: "join", Usage: "Join the current node to an existing cluster", - Flags: []cli.Flag{ - &cli.StringFlag{ - Name: "role", - Usage: "The role of the node (can be controller or worker)", - Value: "worker", - }, - }, Action: func(c *cli.Context) error { + binname := defaults.BinaryName() + if c.Args().Len() != 1 { + return fmt.Errorf("usage: %s node join ", binname) + } + var hvmtoken JoinToken + if err := hvmtoken.Decode(c.Args().First()); err != nil { + return fmt.Errorf("unable to decode join token: %w", err) + } + metrics.ReportJoinStarted(c.Context, hvmtoken.ClusterID) if err := canRunJoin(c); err != nil { + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) return err } logrus.Infof("Materializing binaries") if err := goods.Materialize(); err != nil { - return fmt.Errorf("unable to materialize binaries: %w", err) + err := fmt.Errorf("unable to materialize binaries: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err } if err := runHostPreflightsLocally(c); err != nil { - return fmt.Errorf("unable to run host preflights locally: %w", err) + err := fmt.Errorf("unable to run host preflights locally: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err } logrus.Infof("Saving token to disk") - if err := saveTokenToDisk(c.Args().First()); err != nil { - return fmt.Errorf("unable to save token to disk: %w", err) + if err := saveTokenToDisk(hvmtoken.Token); err != nil { + err := fmt.Errorf("unable to save token to disk: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err } logrus.Infof("Installing binary") if err := installK0sBinary(); err != nil { - return fmt.Errorf("unable to install k0s binary: %w", err) + err := fmt.Errorf("unable to install k0s binary: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err } logrus.Infof("Joining node to cluster") - if err := runK0sInstallCommand(c.String("role")); err != nil { - return fmt.Errorf("unable to join node to cluster: %w", err) + if err := runK0sInstallCommand(hvmtoken.Role); err != nil { + err := fmt.Errorf("unable to join node to cluster: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err } logrus.Infof("Creating systemd unit file") - if err := createSystemdUnitFile(c.String("role")); err != nil { - return fmt.Errorf("unable to create systemd unit file: %w", err) + if err := createSystemdUnitFile(hvmtoken.Role); err != nil { + err := fmt.Errorf("unable to create systemd unit file: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err } logrus.Infof("Starting service") if err := startK0sService(); err != nil { - return fmt.Errorf("unable to start service: %w", err) + err := fmt.Errorf("unable to start service: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err } + fpath := defaults.PathToConfig(".cluster-id") + cid := hvmtoken.ClusterID.String() + if err := os.WriteFile(fpath, []byte(cid), 0644); err != nil { + err := fmt.Errorf("unable to write cluster id to disk: %w", err) + metrics.ReportJoinFailed(c.Context, hvmtoken.ClusterID, err) + return err + } + metrics.ReportJoinSucceeded(c.Context, hvmtoken.ClusterID) return nil }, } @@ -115,12 +141,6 @@ func canRunJoin(c *cli.Context) error { if os.Getuid() != 0 { return fmt.Errorf("join command must be run as root") } - if c.Args().Len() != 1 { - return fmt.Errorf("usage: %s node join ", defaults.BinaryName()) - } - if role := c.String("role"); role != "controller" && role != "worker" { - return fmt.Errorf("role must be either controller or worker") - } return nil } diff --git a/cmd/helmvm/token.go b/cmd/helmvm/token.go index 16aa094c2..4b604f741 100644 --- a/cmd/helmvm/token.go +++ b/cmd/helmvm/token.go @@ -2,6 +2,8 @@ package main import ( "bytes" + "encoding/base64" + "encoding/json" "fmt" "io" "os" @@ -9,10 +11,12 @@ import ( "runtime" "time" + "github.com/google/uuid" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" "github.com/replicatedhq/helmvm/pkg/defaults" + "github.com/replicatedhq/helmvm/pkg/metrics" "github.com/replicatedhq/helmvm/pkg/prompts" ) @@ -22,6 +26,32 @@ var tokenCommands = &cli.Command{ Subcommands: []*cli.Command{tokenCreateCommand}, } +// JoinToken is a struct that holds both the actual token and the cluster id. This is marshaled +// and base64 encoded and used as argument to the join command in the other nodes. +type JoinToken struct { + ClusterID uuid.UUID `json:"clusterID"` + Token string `json:"token"` + Role string `json:"role"` +} + +// Decode decodes a base64 encoded JoinToken. +func (j *JoinToken) Decode(b64 string) error { + decoded, err := base64.StdEncoding.DecodeString(b64) + if err != nil { + return err + } + return json.Unmarshal(decoded, j) +} + +// Encode encodes a JoinToken to base64. +func (j *JoinToken) Encode() (string, error) { + b, err := json.Marshal(j) + if err != nil { + return "", err + } + return base64.StdEncoding.EncodeToString(b), nil +} + var tokenCreateCommand = &cli.Command{ Name: "create", Usage: "Creates a new node join token", @@ -83,14 +113,19 @@ var tokenCreateCommand = &cli.Command{ } if !defaults.DecentralizedInstall() { if err := defaults.SetInstallAsDecentralized(); err != nil { - return fmt.Errorf("failed to set decentralized install: %w", err) + return fmt.Errorf("unable to set decentralized install: %w", err) } } + token := JoinToken{metrics.ClusterID(), buf.String(), role} + b64token, err := token.Encode() + if err != nil { + return fmt.Errorf("unable to encode token: %w", err) + } fmt.Println("Token created successfully.") fmt.Printf("This token is valid for %s hours.\n", dur) fmt.Println("You can now run the following command in a remote node to add it") fmt.Printf("to the cluster as a %q node:\n", role) - fmt.Printf("%s node join --role %s %s", defaults.BinaryName(), role, buf.String()) + fmt.Printf("%s node join %s\n", defaults.BinaryName(), b64token) return nil }, } diff --git a/cmd/helmvm/upgrade.go b/cmd/helmvm/upgrade.go index a137ad0d9..566ac684c 100644 --- a/cmd/helmvm/upgrade.go +++ b/cmd/helmvm/upgrade.go @@ -13,6 +13,7 @@ import ( "github.com/replicatedhq/helmvm/pkg/addons" "github.com/replicatedhq/helmvm/pkg/defaults" "github.com/replicatedhq/helmvm/pkg/goods" + "github.com/replicatedhq/helmvm/pkg/metrics" "github.com/replicatedhq/helmvm/pkg/preflights" "github.com/replicatedhq/helmvm/pkg/prompts" ) @@ -103,34 +104,49 @@ var upgradeCommand = &cli.Command{ }, }, Action: func(c *cli.Context) error { + metrics.ReportNodeUpgradeStarted(c.Context) if err := canRunUpgrade(c); err != nil { + metrics.ReportNodeUpgradeFailed(c.Context, err) return err } logrus.Infof("Materializing binaries") if err := goods.Materialize(); err != nil { - return fmt.Errorf("unable to materialize binaries: %w", err) + err := fmt.Errorf("unable to materialize binaries: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } if err := runHostPreflightsLocally(c); err != nil { - return fmt.Errorf("unable to run host preflights locally: %w", err) + err := fmt.Errorf("unable to run host preflights locally: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } logrus.Infof("Stopping %s", defaults.BinaryName()) if err := stopHelmVM(); err != nil { - return fmt.Errorf("unable to stop: %w", err) + err := fmt.Errorf("unable to stop: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } logrus.Infof("Installing binary") if err := installK0sBinary(); err != nil { - return fmt.Errorf("unable to install k0s binary: %w", err) + err := fmt.Errorf("unable to install k0s binary: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } logrus.Infof("Starting service") if err := startK0sService(); err != nil { - return fmt.Errorf("unable to start service: %w", err) + err := fmt.Errorf("unable to start service: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } kcfg := defaults.PathToConfig("kubeconfig") if _, err := os.Stat(kcfg); err != nil { if os.IsNotExist(err) { + metrics.ReportNodeUpgradeSucceeded(c.Context) return nil } - return fmt.Errorf("unable to stat kubeconfig: %w", err) + err := fmt.Errorf("unable to stat kubeconfig: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } os.Setenv("KUBECONFIG", kcfg) logrus.Infof("Upgrading addons") @@ -142,8 +158,11 @@ var upgradeCommand = &cli.Command{ opts = append(opts, addons.WithoutAddon(addon)) } if err := addons.NewApplier(opts...).Apply(c.Context); err != nil { - return fmt.Errorf("unable to apply addons: %w", err) + err := fmt.Errorf("unable to apply addons: %w", err) + metrics.ReportNodeUpgradeFailed(c.Context, err) + return err } + metrics.ReportNodeUpgradeSucceeded(c.Context) logrus.Infof("Upgrade complete") return nil }, diff --git a/cmd/helmvm/version.go b/cmd/helmvm/version.go index 85c5c5fba..8733591e4 100644 --- a/cmd/helmvm/version.go +++ b/cmd/helmvm/version.go @@ -11,8 +11,6 @@ import ( "github.com/replicatedhq/helmvm/pkg/defaults" ) -var Version = "v0.0.0" - var versionCommand = &cli.Command{ Name: "version", Usage: fmt.Sprintf("Shows the %s installer version", defaults.BinaryName()), @@ -24,7 +22,7 @@ var versionCommand = &cli.Command{ } writer := table.NewWriter() writer.AppendHeader(table.Row{"component", "version"}) - writer.AppendRow(table.Row{"Installer", Version}) + writer.AppendRow(table.Row{"Installer", defaults.Version}) writer.AppendRow(table.Row{"Kubernetes", defaults.K0sVersion}) for name, version := range versions { if !strings.HasPrefix(version, "v") { diff --git a/e2e/install_test.go b/e2e/install_test.go index 5f08c8115..490bc0120 100644 --- a/e2e/install_test.go +++ b/e2e/install_test.go @@ -283,7 +283,7 @@ func TestHostPreflight(t *testing.T) { defer tc.Destroy() t.Log("installing ssh and binutils on node 0") commands := [][]string{ - {"dnf", "install", "-y", "openssh-server", "binutils", "tar"}, + {"dnf", "--setopt=metadata_expire=120", "install", "-y", "openssh-server", "binutils", "tar"}, {"systemctl", "enable", "sshd"}, {"systemctl", "start", "sshd"}, } diff --git a/e2e/utils.go b/e2e/utils.go index 1462cadeb..d12b01898 100644 --- a/e2e/utils.go +++ b/e2e/utils.go @@ -18,17 +18,22 @@ func (b *buffer) Close() error { return nil } +// RunCommandsOnNode runs a series of commands on a node. func RunCommandsOnNode(t *testing.T, cl *cluster.Output, node int, cmds [][]string) error { for _, cmd := range cmds { cmdstr := strings.Join(cmd, " ") t.Logf("running `%s` node %d", cmdstr, node) - if _, _, err := RunCommandOnNode(t, cl, node, cmd); err != nil { + stdout, stderr, err := RunCommandOnNode(t, cl, node, cmd) + if err != nil { + t.Logf("stdout:\n%s", stdout) + t.Logf("stderr:\n%s", stderr) return err } } return nil } +// RunCommandOnNode runs a command on a node with a timeout. func RunCommandOnNode(t *testing.T, cl *cluster.Output, node int, line []string) (string, string, error) { stdout := &buffer{bytes.NewBuffer(nil)} stderr := &buffer{bytes.NewBuffer(nil)} diff --git a/pkg/addons/adminconsole/adminconsole.go b/pkg/addons/adminconsole/adminconsole.go index 107f9a62d..ba0302367 100644 --- a/pkg/addons/adminconsole/adminconsole.go +++ b/pkg/addons/adminconsole/adminconsole.go @@ -66,7 +66,7 @@ func (a *AdminConsole) HostPreflights() (*v1beta2.HostPreflightSpec, error) { // addLicenseToHelmValues adds the embedded license to the helm values. func (a *AdminConsole) addLicenseToHelmValues() error { - license, err := a.customization.license() + license, err := a.customization.License() if err != nil { return fmt.Errorf("unable to get license: %w", err) } diff --git a/pkg/addons/adminconsole/customize.go b/pkg/addons/adminconsole/customize.go index e76502092..1c22349f3 100644 --- a/pkg/addons/adminconsole/customize.go +++ b/pkg/addons/adminconsole/customize.go @@ -196,7 +196,7 @@ func (a *AdminConsoleCustomization) hostPreflights() (*v1beta2.HostPreflightSpec // license reads the kots license from the embedded Kots Application Release. If no license is found, // returns nil and no error. -func (a *AdminConsoleCustomization) license() (*v1beta1.License, error) { +func (a *AdminConsoleCustomization) License() (*v1beta1.License, error) { if runtime.GOOS != "linux" { return nil, nil } diff --git a/pkg/addons/applier.go b/pkg/addons/applier.go index ec629a0ea..4d72360c7 100644 --- a/pkg/addons/applier.go +++ b/pkg/addons/applier.go @@ -110,7 +110,7 @@ func (a *Applier) load() (map[string]AddOn, error) { return addons, nil } -// Version returns a map with the version of each addon that will be applied. +// Versions returns a map with the version of each addon that will be applied. func (a *Applier) Versions() (map[string]string, error) { addons, err := a.load() if err != nil { diff --git a/pkg/defaults/defaults.go b/pkg/defaults/defaults.go index 1e7cf95c7..c4857cb79 100644 --- a/pkg/defaults/defaults.go +++ b/pkg/defaults/defaults.go @@ -4,6 +4,8 @@ package defaults var ( + // Version holds the HelmVM version. + Version = "v0.0.0" // K0sVersion holds the version of k0s binary we are embedding. this is // set at compile time via ldflags. K0sVersion = "0.0.0" @@ -104,3 +106,9 @@ func DecentralizedInstall() bool { func SetInstallAsDecentralized() error { return def().SetInstallAsDecentralized() } + +// IsUpgrade determines if we are upgrading a cluster judging by the existence +// or not of a kubeconfig file in the configuration directory. +func IsUpgrade() bool { + return def().IsUpgrade() +} diff --git a/pkg/defaults/provider.go b/pkg/defaults/provider.go index 4e544d2e1..f3ea82273 100644 --- a/pkg/defaults/provider.go +++ b/pkg/defaults/provider.go @@ -198,3 +198,11 @@ func (d *DefaultsProvider) SetInstallAsDecentralized() error { defer fp.Close() return nil } + +// IsUpgrade determines if we are upgrading a cluster judging by the existence +// or not of a kubeconfig file in the configuration directory. +func (d *DefaultsProvider) IsUpgrade() bool { + fpath := d.PathToConfig("kubeconfig") + _, err := os.Stat(fpath) + return err == nil +} diff --git a/pkg/defaults/provider_test.go b/pkg/defaults/provider_test.go index 9e18fceb7..9b93022ee 100644 --- a/pkg/defaults/provider_test.go +++ b/pkg/defaults/provider_test.go @@ -8,6 +8,18 @@ import ( "github.com/stretchr/testify/assert" ) +func TestIsUpgrade(t *testing.T) { + tmpdir, err := os.MkdirTemp("", "helmvm") + assert.NoError(t, err) + defer os.RemoveAll(tmpdir) + provider := NewProvider(tmpdir) + assert.False(t, provider.IsUpgrade(), "default should be not upgrade") + dstfile := provider.PathToConfig("kubeconfig") + err = os.WriteFile(dstfile, []byte("test"), 0600) + assert.NoError(t, err) + assert.True(t, provider.IsUpgrade(), "should be upgrade") +} + func TestInit(t *testing.T) { tmpdir, err := os.MkdirTemp("", "helmvm") assert.NoError(t, err) diff --git a/pkg/metrics/events.go b/pkg/metrics/events.go new file mode 100644 index 000000000..c1da4e48b --- /dev/null +++ b/pkg/metrics/events.go @@ -0,0 +1,129 @@ +package metrics + +import ( + "github.com/google/uuid" +) + +// Event is implemented by all events. Title returns a string that identifies the +// event type. +type Event interface { + Title() string +} + +// InstallationStarted event is send back home when the installation starts. +type InstallationStarted struct { + ClusterID uuid.UUID `json:"clusterID"` + Version string `json:"version"` + Flags string `json:"flags"` + BinaryName string `json:"binaryName"` + Type string `json:"type"` + LicenseID string `json:"licenseID"` +} + +// Title returns the name of the event. +func (e InstallationStarted) Title() string { + return "InstallationStarted" +} + +// InstallationSucceeded event is send back home when the installation finishes. +type InstallationSucceeded struct { + ClusterID uuid.UUID `json:"clusterID"` +} + +// Title returns the name of the event. +func (e InstallationSucceeded) Title() string { + return "InstallationSucceeded" +} + +// InstallationFailed event is send back home when the installation fails. +type InstallationFailed struct { + ClusterID uuid.UUID `json:"clusterID"` + Reason string `json:"reason"` +} + +// Title returns the name of the event. +func (e InstallationFailed) Title() string { + return "InstallationFailed" +} + +// UpgradeStarted event is send back home when the upgrade starts. +type UpgradeStarted InstallationStarted + +// Title returns the name of the event. +func (e UpgradeStarted) Title() string { + return "UpgradeStarted" +} + +// UpgradeSucceeded event is send back home when the upgrade finishes. +type UpgradeSucceeded InstallationSucceeded + +// Title returns the name of the event. +func (e UpgradeSucceeded) Title() string { + return "UpgradeSucceeded" +} + +// UpgradeFailed event is send back home when the upgrade fails. +type UpgradeFailed InstallationFailed + +// Title returns the name of the event. +func (e UpgradeFailed) Title() string { + return "UpgradeFailed" +} + +// JoinStarted event is send back home when a node join starts. +type JoinStarted struct { + ClusterID uuid.UUID `json:"clusterID"` + NodeName string `json:"nodeName"` +} + +// Title returns the name of the event. +func (e JoinStarted) Title() string { + return "JoinStarted" +} + +// JoinSucceeded event is send back home when a node join succeeds. +type JoinSucceeded JoinStarted + +// Title returns the name of the event. +func (e JoinSucceeded) Title() string { + return "JoinSucceeded" +} + +// JoinFailed event is send back home when a node join fails. +type JoinFailed struct { + ClusterID uuid.UUID `json:"clusterID"` + NodeName string `json:"nodeName"` + Reason string `json:"reason"` +} + +// Title returns the name of the event. +func (e JoinFailed) Title() string { + return "JoinFailed" +} + +// NodeUpgradeStarted event is send back home when a node upgrade +// starts. +type NodeUpgradeStarted JoinStarted + +// Title returns the name of the event. +func (e NodeUpgradeStarted) Title() string { + return "NodeUpgradeStarted" +} + +// NodeUpgradeSucceeded event is send back home when a node upgrade +// succeeds. +type NodeUpgradeSucceeded NodeUpgradeStarted + +// Title returns the name of the event. +func (e NodeUpgradeSucceeded) Title() string { + return "NodeUpgradeSucceeded" +} + +// NodeUpgradeFailed event is send back home when a node upgrade +// fails. +type NodeUpgradeFailed JoinFailed + +// Title returns the name of the event. +func (e NodeUpgradeFailed) Title() string { + return "NodeUpgradeFailed" +} diff --git a/pkg/metrics/reporter.go b/pkg/metrics/reporter.go new file mode 100644 index 000000000..7795dbcdd --- /dev/null +++ b/pkg/metrics/reporter.go @@ -0,0 +1,206 @@ +package metrics + +import ( + "context" + "os" + "strings" + "time" + + "github.com/google/uuid" + "github.com/sirupsen/logrus" + "github.com/urfave/cli/v2" + + "github.com/replicatedhq/helmvm/pkg/addons/adminconsole" + "github.com/replicatedhq/helmvm/pkg/defaults" +) + +// isUpgrade holds globally if we are upgrading a cluster or installing a new one. +// This is used to decide which events to send and is determined only once at the +// beginning of the execution (see init()). +var isUpgrade bool + +func init() { + isUpgrade = defaults.IsUpgrade() +} + +// LicenseID returns the embedded license id. If something goes wrong, it returns +// an empty string. +func LicenseID() string { + var custom adminconsole.AdminConsoleCustomization + if license, err := custom.License(); err == nil && license != nil { + return license.Spec.LicenseID + } + return "" +} + +// ClusterID returns the cluster id. It is read from from a local file (if this is +// a second attempt at installation or an upgrade) or a new one is generated and +// stored locally. TODO: this should be persisted in the cluster as part of a CRD +// managed by our operator, as we don't have an operator yet, we are storing it +// locally only. +func ClusterID() uuid.UUID { + fpath := defaults.PathToConfig(".cluster-id") + if _, err := os.Stat(fpath); err == nil { + data, err := os.ReadFile(fpath) + if err != nil { + logrus.Warnf("unable to read cluster id from %s: %s", fpath, err) + return uuid.New() + } + id, err := uuid.Parse(string(data)) + if err != nil { + logrus.Warnf("unable to parse cluster id from %s: %s", fpath, err) + return uuid.New() + } + return id + } + id := uuid.New() + if err := os.WriteFile(fpath, []byte(id.String()), 0644); err != nil { + logrus.Warnf("unable to write cluster id to %s: %s", fpath, err) + } + return id +} + +// ReportInstallationStarted reports that the installation has started. +func ReportInstallationStarted(ctx context.Context) { + Send(ctx, InstallationStarted{ + ClusterID: ClusterID(), + Version: defaults.Version, + Flags: strings.Join(os.Args[1:], " "), + BinaryName: defaults.BinaryName(), + Type: "centralized", + LicenseID: LicenseID(), + }) +} + +// ReportInstallationSucceeded reports that the installation has succeeded. +func ReportInstallationSucceeded(ctx context.Context) { + Send(ctx, InstallationSucceeded{ClusterID: ClusterID()}) +} + +// ReportInstallationFailed reports that the installation has failed. +func ReportInstallationFailed(ctx context.Context, err error) { + Send(ctx, InstallationFailed{ClusterID(), err.Error()}) +} + +// ReportUpgradeStarted reports that the upgrade has started. +func ReportUpgradeStarted(ctx context.Context) { + Send(ctx, UpgradeStarted{ + ClusterID: ClusterID(), + Version: defaults.Version, + Flags: strings.Join(os.Args[1:], " "), + BinaryName: defaults.BinaryName(), + Type: "centralized", + LicenseID: LicenseID(), + }) +} + +// ReportUpgradeSucceeded reports that the upgrade has succeeded. +func ReportUpgradeSucceeded(ctx context.Context) { + Send(ctx, UpgradeSucceeded{ClusterID: ClusterID()}) +} + +// ReportUpgradeFailed reports that the upgrade has failed. +func ReportUpgradeFailed(ctx context.Context, err error) { + Send(ctx, UpgradeFailed{ClusterID(), err.Error()}) +} + +// ReportJoinStarted reports that a join has started. +func ReportJoinStarted(ctx context.Context, clusterID uuid.UUID) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, JoinStarted{clusterID, hostname}) +} + +// ReportJoinSucceeded reports that a join has finished successfully. +func ReportJoinSucceeded(ctx context.Context, clusterID uuid.UUID) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, JoinSucceeded{clusterID, hostname}) +} + +// ReportJoinFailed reports that a join has failed. +func ReportJoinFailed(ctx context.Context, clusterID uuid.UUID, exterr error) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, JoinFailed{clusterID, hostname, exterr.Error()}) +} + +// ReportApplyStarted decides if we are going to report an InstallationStarted +// or an UpgradeStarted event and calls the appropriate function. If there has +// been provided a bundle directory through the command line it assumes this is +// a disconnected install and returns. +func ReportApplyStarted(c *cli.Context) { + if c.String("bundle-dir") != "" { + return + } + ctx, cancel := context.WithTimeout(c.Context, 5*time.Second) + defer cancel() + if isUpgrade { + ReportUpgradeStarted(ctx) + return + } + ReportInstallationStarted(ctx) +} + +// ReportApplyFinished decides if we are going to report an InstallationSucceeded, +// an InstallationFailed, an UpgradeSucceeded, or an UpgradeFailed event and calls +// the appropriate function. +func ReportApplyFinished(c *cli.Context, err error) { + if c.String("bundle-dir") != "" { + return + } + ctx, cancel := context.WithTimeout(c.Context, 5*time.Second) + defer cancel() + if err != nil { + if isUpgrade { + ReportUpgradeFailed(ctx, err) + return + } + ReportInstallationFailed(ctx, err) + return + } + if isUpgrade { + ReportUpgradeSucceeded(ctx) + return + } + ReportInstallationSucceeded(ctx) +} + +// ReportNodeUpgradeStarted reports that a node upgrade has started. +func ReportNodeUpgradeStarted(ctx context.Context) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, NodeUpgradeStarted{ClusterID(), hostname}) +} + +// ReportNodeUpgradeSucceeded reports that a node upgrade has finished successfully. +func ReportNodeUpgradeSucceeded(ctx context.Context) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, NodeUpgradeSucceeded{ClusterID(), hostname}) +} + +// ReportNodeUpgradeFailed reports that node upgrade has failed. +func ReportNodeUpgradeFailed(ctx context.Context, exterr error) { + hostname, err := os.Hostname() + if err != nil { + logrus.Warnf("unable to get hostname: %s", err) + hostname = "unknown" + } + Send(ctx, NodeUpgradeFailed{ClusterID(), hostname, exterr.Error()}) +} diff --git a/pkg/metrics/sender.go b/pkg/metrics/sender.go new file mode 100644 index 000000000..a36b16c17 --- /dev/null +++ b/pkg/metrics/sender.go @@ -0,0 +1,70 @@ +package metrics + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + "os" + + "github.com/sirupsen/logrus" + + "github.com/replicatedhq/helmvm/pkg/addons/adminconsole" +) + +// Send is a helper function that sends an event to the metrics endpoint. +// Metrics endpoint can be overwritten by the license.spec.endpoint field +// or by the HELMVM_METRICS_BASEURL environment variable, the latter has +// precedence over the former. +func Send(ctx context.Context, ev Event) { + var baseURL = "https://replicated.app" + var custom adminconsole.AdminConsoleCustomization + if license, err := custom.License(); err != nil { + logrus.Warnf("unable to read license: %s", err) + return + } else if license != nil && license.Spec.Endpoint != "" { + baseURL = license.Spec.Endpoint + } + if os.Getenv("HELMVM_METRICS_BASEURL") != "" { + baseURL = os.Getenv("HELMVM_METRICS_BASEURL") + } + sender := Sender{baseURL} + sender.Send(ctx, ev) +} + +// Sender sends events to the metrics endpoint. +type Sender struct { + baseURL string +} + +// Send sends an event to the metrics endpoint. +func (s *Sender) Send(ctx context.Context, ev Event) { + url := fmt.Sprintf("%s/helmbin_metrics/%s", s.baseURL, ev.Title()) + payload, err := s.payload(ev) + if err != nil { + logrus.Debugf("unable to get payload for event %s: %s", ev.Title(), err) + return + } + request, err := http.NewRequest(http.MethodPost, url, bytes.NewBuffer(payload)) + if err != nil { + logrus.Debugf("unable to create request for event %s: %s", ev.Title(), err) + return + } + request.Header.Set("Content-Type", "application/json") + response, err := http.DefaultClient.Do(request.WithContext(ctx)) + if err != nil { + logrus.Debugf("unable to send event %s: %s", ev.Title(), err) + return + } + defer response.Body.Close() + if response.StatusCode != http.StatusOK { + logrus.Debugf("unable to confirm event %s: %d", ev.Title(), response.StatusCode) + } +} + +// payload returns the payload to be sent to the metrics endpoint. +func (s *Sender) payload(ev Event) ([]byte, error) { + payload := map[string]Event{"event": ev} + return json.Marshal(payload) +} diff --git a/pkg/metrics/sender_test.go b/pkg/metrics/sender_test.go new file mode 100644 index 000000000..43f90eccb --- /dev/null +++ b/pkg/metrics/sender_test.go @@ -0,0 +1,142 @@ +package metrics + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "reflect" + "testing" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" +) + +func TestSend(t *testing.T) { + for _, tt := range []struct { + name string + event Event + }{ + { + name: "InstallationStarted", + event: InstallationStarted{ + ClusterID: uuid.New(), + Version: "1.2.3", + Flags: "foo", + BinaryName: "bar", + Type: "baz", + LicenseID: "qux", + }, + }, + { + name: "InstallationSucceeded", + event: InstallationSucceeded{ + ClusterID: uuid.New(), + }, + }, + { + name: "InstallationFailed", + event: InstallationFailed{ + ClusterID: uuid.New(), + Reason: "foo", + }, + }, + { + name: "UpgradeStarted", + event: UpgradeStarted{ + ClusterID: uuid.New(), + Version: "1.2.3", + Flags: "foo", + BinaryName: "bar", + Type: "baz", + LicenseID: "qux", + }, + }, + { + name: "UpgradeSucceeded", + event: UpgradeSucceeded{ + ClusterID: uuid.New(), + }, + }, + { + name: "UpgradeFailed", + event: InstallationFailed{ + ClusterID: uuid.New(), + Reason: "foo", + }, + }, + { + name: "JoinStarted", + event: JoinStarted{ + ClusterID: uuid.New(), + NodeName: "foo", + }, + }, + { + name: "JoinSucceeded", + event: JoinSucceeded{ + ClusterID: uuid.New(), + NodeName: "foo", + }, + }, + { + name: "JoinFailed", + event: JoinFailed{ + ClusterID: uuid.New(), + NodeName: "foo", + Reason: "bar", + }, + }, + { + name: "NodeUpgradeStarted", + event: NodeUpgradeStarted{ + ClusterID: uuid.New(), + NodeName: "foo", + }, + }, + { + name: "NodeUpgradeSucceeded", + event: NodeUpgradeSucceeded{ + ClusterID: uuid.New(), + NodeName: "foo", + }, + }, + { + name: "NodeUpgradeFailed", + event: NodeUpgradeFailed{ + ClusterID: uuid.New(), + NodeName: "foo", + Reason: "bar", + }, + }, + } { + t.Run(tt.name, func(t *testing.T) { + payload := map[string]Event{"event": tt.event} + expected, err := json.Marshal(payload) + assert.NoError(t, err) + server := httptest.NewServer( + http.HandlerFunc( + func(rw http.ResponseWriter, req *http.Request) { + evname := reflect.TypeOf(tt.event).Name() + path := fmt.Sprintf("/helmbin_metrics/%s", evname) + assert.Equal(t, req.URL.Path, path) + assert.Equal(t, "POST", req.Method) + received, err := io.ReadAll(req.Body) + assert.NoError(t, err) + assert.Equal(t, expected, received) + var decoded = map[string]interface{}{} + err = json.Unmarshal(received, &decoded) + assert.NoError(t, err) + assert.Contains(t, decoded, "event") + rw.Write([]byte(`OK`)) + }, + ), + ) + defer server.Close() + sender := Sender{baseURL: server.URL} + sender.Send(context.Background(), tt.event) + }) + } +} diff --git a/pkg/progressbar/progressbar.go b/pkg/progressbar/progressbar.go index 0d86305cf..5913eb1ae 100644 --- a/pkg/progressbar/progressbar.go +++ b/pkg/progressbar/progressbar.go @@ -11,7 +11,7 @@ import ( var blocks = []string{"◐", "◓", "◑", "◒"} -// WriterFn is a function that writes a formatted string. +// WriteFn is a function that writes a formatted string. type WriteFn func(string, ...any) (int, error) // MaskFn is a function that masks a message. Receives a string and