diff --git a/config/crds/troubleshoot.sh_analyzers.yaml b/config/crds/troubleshoot.sh_analyzers.yaml index 982aaf093..c18eff2e8 100644 --- a/config/crds/troubleshoot.sh_analyzers.yaml +++ b/config/crds/troubleshoot.sh_analyzers.yaml @@ -559,6 +559,57 @@ spec: required: - outcomes type: object + goldpinger: + properties: + annotations: + additionalProperties: + type: string + type: object + checkName: + type: string + collectorName: + type: string + exclude: + type: BoolString + filePath: + type: string + outcomes: + items: + properties: + fail: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + pass: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + warn: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + type: object + type: array + strict: + type: BoolString + required: + - collectorName + type: object imagePullSecret: properties: annotations: diff --git a/config/crds/troubleshoot.sh_collectors.yaml b/config/crds/troubleshoot.sh_collectors.yaml index a9250ccea..203fe76f5 100644 --- a/config/crds/troubleshoot.sh_collectors.yaml +++ b/config/crds/troubleshoot.sh_collectors.yaml @@ -318,6 +318,35 @@ spec: - namespace - selector type: object + goldpinger: + properties: + collectorName: + type: string + exclude: + type: BoolString + namespace: + type: string + podLaunchOptions: + properties: + image: + type: string + imagePullSecret: + properties: + data: + additionalProperties: + type: string + type: object + name: + type: string + type: + type: string + type: object + namespace: + type: string + serviceAccountName: + type: string + type: object + type: object helm: properties: collectorName: diff --git a/config/crds/troubleshoot.sh_preflights.yaml b/config/crds/troubleshoot.sh_preflights.yaml index 949711ae7..b4f4edbd3 100644 --- a/config/crds/troubleshoot.sh_preflights.yaml +++ b/config/crds/troubleshoot.sh_preflights.yaml @@ -559,6 +559,57 @@ spec: required: - outcomes type: object + goldpinger: + properties: + annotations: + additionalProperties: + type: string + type: object + checkName: + type: string + collectorName: + type: string + exclude: + type: BoolString + filePath: + type: string + outcomes: + items: + properties: + fail: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + pass: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + warn: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + type: object + type: array + strict: + type: BoolString + required: + - collectorName + type: object imagePullSecret: properties: annotations: @@ -1828,6 +1879,35 @@ spec: - namespace - selector type: object + goldpinger: + properties: + collectorName: + type: string + exclude: + type: BoolString + namespace: + type: string + podLaunchOptions: + properties: + image: + type: string + imagePullSecret: + properties: + data: + additionalProperties: + type: string + type: object + name: + type: string + type: + type: string + type: object + namespace: + type: string + serviceAccountName: + type: string + type: object + type: object helm: properties: collectorName: diff --git a/config/crds/troubleshoot.sh_supportbundles.yaml b/config/crds/troubleshoot.sh_supportbundles.yaml index 8b0cdb668..4d5928f23 100644 --- a/config/crds/troubleshoot.sh_supportbundles.yaml +++ b/config/crds/troubleshoot.sh_supportbundles.yaml @@ -590,6 +590,57 @@ spec: required: - outcomes type: object + goldpinger: + properties: + annotations: + additionalProperties: + type: string + type: object + checkName: + type: string + collectorName: + type: string + exclude: + type: BoolString + filePath: + type: string + outcomes: + items: + properties: + fail: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + pass: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + warn: + properties: + message: + type: string + uri: + type: string + when: + type: string + type: object + type: object + type: array + strict: + type: BoolString + required: + - collectorName + type: object imagePullSecret: properties: annotations: @@ -1859,6 +1910,35 @@ spec: - namespace - selector type: object + goldpinger: + properties: + collectorName: + type: string + exclude: + type: BoolString + namespace: + type: string + podLaunchOptions: + properties: + image: + type: string + imagePullSecret: + properties: + data: + additionalProperties: + type: string + type: object + name: + type: string + type: + type: string + type: object + namespace: + type: string + serviceAccountName: + type: string + type: object + type: object helm: properties: collectorName: diff --git a/internal/testutils/utils.go b/internal/testutils/utils.go index e743935dd..d0d2e6005 100644 --- a/internal/testutils/utils.go +++ b/internal/testutils/utils.go @@ -15,16 +15,25 @@ import ( func GetTestFixture(t *testing.T, path string) string { t.Helper() - p := path - if !filepath.IsAbs(path) { - p = filepath.Join("../../testdata", path) - } - b, err := os.ReadFile(p) + + b, err := os.ReadFile(TestFixtureFilePath(t, path)) require.NoError(t, err) return string(b) } -// FileDir returns the directory of the current source file. +func TestFixtureFilePath(t *testing.T, path string) string { + t.Helper() + + if !filepath.IsAbs(path) { + p, err := filepath.Abs(filepath.Join(FileDir(), "../../testdata", path)) + require.NoError(t, err) + return p + } else { + return path + } +} + +// FileDir returns the directory of this source file func FileDir() string { _, filename, _, _ := runtime.Caller(0) return filepath.Dir(filename) diff --git a/internal/util/util.go b/internal/util/util.go index 97f471232..1ab549212 100644 --- a/internal/util/util.go +++ b/internal/util/util.go @@ -74,3 +74,15 @@ func Append[T any](target []T, src []T) []T { } return append(target, src...) } + +// IsInCluster returns true if the code is running within a process +// inside a kubernetes pod +func IsInCluster() bool { + // This is a best effort check, it's not guaranteed to be accurate + host, port := os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT") + if len(host) == 0 || len(port) == 0 { + return false + } + + return true +} diff --git a/pkg/analyze/analyzer.go b/pkg/analyze/analyzer.go index a95b12a2c..e54d4f3b6 100644 --- a/pkg/analyze/analyzer.go +++ b/pkg/analyze/analyzer.go @@ -244,6 +244,8 @@ func getAnalyzer(analyzer *troubleshootv1beta2.Analyze) Analyzer { return &AnalyzeClusterResource{analyzer: analyzer.ClusterResource} case analyzer.Certificates != nil: return &AnalyzeCertificates{analyzer: analyzer.Certificates} + case analyzer.Goldpinger != nil: + return &AnalyzeGoldpinger{analyzer: analyzer.Goldpinger} default: return nil } diff --git a/pkg/analyze/goldpinger.go b/pkg/analyze/goldpinger.go new file mode 100644 index 000000000..cd1af7e6e --- /dev/null +++ b/pkg/analyze/goldpinger.go @@ -0,0 +1,135 @@ +package analyzer + +import ( + "encoding/json" + "fmt" + + "github.com/pkg/errors" + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + "github.com/replicatedhq/troubleshoot/pkg/constants" +) + +type AnalyzeGoldpinger struct { + analyzer *troubleshootv1beta2.GoldpingerAnalyze +} + +type checkAllOutput struct { + Hosts []struct { + HostIP string `json:"hostIP"` + PodIP string `json:"podIP"` + PodName string `json:"podName"` + } `json:"hosts"` + Responses map[string]struct { + HostIP string `json:"hostIP"` + PodIP string `json:"podIP"` + OK bool `json:"OK"` + Response struct { + PodResults map[string]struct { + HostIP string `json:"hostIP"` + OK bool `json:"OK"` + PingTime string `json:"pingTime"` + PodIP string `json:"podIP"` + Response struct { + BootTime string `json:"boot_time"` + } `json:"response"` + Error string `json:"error"` + ResponseTimeMS int `json:"response-time-ms"` + StatusCode int `json:"status-code"` + } `json:"podResults"` + } `json:"response"` + } `json:"responses"` +} + +func (a *AnalyzeGoldpinger) Title() string { + title := a.analyzer.CheckName + if title == "" { + title = a.collectorName() + } + + return title +} + +func (a *AnalyzeGoldpinger) IsExcluded() (bool, error) { + return isExcluded(a.analyzer.Exclude) +} + +func (a *AnalyzeGoldpinger) Analyze(getFile getCollectedFileContents, findFiles getChildCollectedFileContents) ([]*AnalyzeResult, error) { + caoFilePath := constants.GP_CHECK_ALL_RESULTS_PATH + // To allow analysing older support bundles, we can provide a custom file path + if a.analyzer.FilePath != "" { + caoFilePath = a.analyzer.FilePath + } + + collected, err := getFile(caoFilePath) + if err != nil { + return nil, errors.Wrapf(err, "failed to read collected file path: %q", caoFilePath) + } + + var cao checkAllOutput + err = json.Unmarshal(collected, &cao) + if err != nil { + return nil, errors.Wrap(err, "failed to unmarshal collected goldpinger output") + } + + return a.podPingsAnalysis(&cao), nil +} + +func (a *AnalyzeGoldpinger) collectorName() string { + if a.analyzer.CollectorName != "" { + return a.analyzer.CollectorName + } + return "goldpinger" +} + +func (a *AnalyzeGoldpinger) podPingsAnalysis(cao *checkAllOutput) []*AnalyzeResult { + results := []*AnalyzeResult{} + + for _, host := range cao.Hosts { + // Check if the pod from a host has any ping errors from other pods + targetPod := host.PodName + pingsSucceeded := true + for srcPod, resp := range cao.Responses { + res := &AnalyzeResult{ + IconKey: "kubernetes", + Strict: a.analyzer.Strict.BoolOrDefaultFalse(), + } + + // Get ping result for the pod + podResult, ok := resp.Response.PodResults[targetPod] + if !ok { + // Pod not found in ping results from the source pod + res.IsWarn = true + res.Title = fmt.Sprintf("Missing ping results for %q pod", targetPod) + res.Message = fmt.Sprintf("Ping result for %q pod from %q pod is missing", targetPod, srcPod) + pingsSucceeded = false + + results = append(results, res) + continue + } + + if !podResult.OK { + // Ping was not successful + res.IsFail = true + res.Title = fmt.Sprintf("Ping from %q pod to %q pod failed", srcPod, targetPod) + res.Message = fmt.Sprintf("Ping error: %s", podResult.Error) + pingsSucceeded = false + + results = append(results, res) + continue + } + } + + // If all pings succeeded, add a pass result + if pingsSucceeded { + results = append(results, &AnalyzeResult{ + IconKey: "kubernetes", + Strict: a.analyzer.Strict.BoolOrDefaultFalse(), + IsPass: true, + Title: fmt.Sprintf("Pings to %q pod succeeded", targetPod), + Message: fmt.Sprintf("Pings to %q pod from all other pods in the cluster succeeded", targetPod), + }) + } + } + + return results +} diff --git a/pkg/analyze/goldpinger_test.go b/pkg/analyze/goldpinger_test.go new file mode 100644 index 000000000..ad80adb02 --- /dev/null +++ b/pkg/analyze/goldpinger_test.go @@ -0,0 +1,125 @@ +package analyzer + +import ( + "encoding/json" + "testing" + + "github.com/replicatedhq/troubleshoot/internal/testutils" + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestUnmarshallingCheckAllResults(t *testing.T) { + s := testutils.GetTestFixture(t, "goldpinger/checkall-with-error.json") + var res checkAllOutput + err := json.Unmarshal([]byte(s), &res) + require.NoError(t, err) + assert.Len(t, res.Hosts, 3) + assert.Equal(t, "goldpinger-tbdsb", res.Hosts[1].PodName) + assert.Equal(t, "10.32.2.2", res.Responses["goldpinger-4hctt"].Response.PodResults["goldpinger-jj9mw"].PodIP) + assert.Equal(t, + `Get "http://10.32.0.9:80/ping": context deadline exceeded`, + res.Responses["goldpinger-tbdsb"].Response.PodResults["goldpinger-4hctt"].Error, + ) +} + +func TestAnalyzeGoldpinger_podPingsAnalysis(t *testing.T) { + tests := []struct { + name string + cao *checkAllOutput + want []*AnalyzeResult + }{ + { + name: "no ping errors", + cao: caoFixture(t, "goldpinger/checkall-success.json"), + want: []*AnalyzeResult{ + { + Title: "Pings to \"goldpinger-kpz4g\" pod succeeded", + Message: "Pings to \"goldpinger-kpz4g\" pod from all other pods in the cluster succeeded", + IconKey: "kubernetes", + IsPass: true, + }, + { + Title: "Pings to \"goldpinger-k6d2j\" pod succeeded", + Message: "Pings to \"goldpinger-k6d2j\" pod from all other pods in the cluster succeeded", + IconKey: "kubernetes", + IsPass: true, + }, + { + Title: "Pings to \"goldpinger-5ck4d\" pod succeeded", + Message: "Pings to \"goldpinger-5ck4d\" pod from all other pods in the cluster succeeded", + IconKey: "kubernetes", + IsPass: true, + }, + }, + }, + { + name: "with some ping errors", + cao: caoFixture(t, "goldpinger/checkall-with-error.json"), + want: []*AnalyzeResult{ + { + Title: "Ping from \"goldpinger-jj9mw\" pod to \"goldpinger-4hctt\" pod failed", + Message: "Ping error: Get \"http://10.32.0.9:80/ping\": context deadline exceeded", + IconKey: "kubernetes", + IsFail: true, + }, + { + Title: "Ping from \"goldpinger-jj9mw\" pod to \"goldpinger-tbdsb\" pod failed", + Message: "Ping error: Get \"http://10.32.1.2:80/ping\": context deadline exceeded", + IconKey: "kubernetes", + IsFail: true, + }, + { + Title: "Ping from \"goldpinger-4hctt\" pod to \"goldpinger-jj9mw\" pod failed", + Message: "Ping error: Get \"http://10.32.2.2:80/ping\": context deadline exceeded", + IconKey: "kubernetes", + IsFail: true, + }, + { + Title: "Ping from \"goldpinger-4hctt\" pod to \"goldpinger-tbdsb\" pod failed", + Message: "Ping error: Get \"http://10.32.1.2:80/ping\": context deadline exceeded", + IconKey: "kubernetes", + IsFail: true, + }, + { + Title: "Ping from \"goldpinger-tbdsb\" pod to \"goldpinger-4hctt\" pod failed", + Message: "Ping error: Get \"http://10.32.0.9:80/ping\": context deadline exceeded", + IconKey: "kubernetes", + IsFail: true, + }, + { + Title: "Ping from \"goldpinger-tbdsb\" pod to \"goldpinger-jj9mw\" pod failed", + Message: "Ping error: Get \"http://10.32.2.2:80/ping\": context deadline exceeded", + IconKey: "kubernetes", + IsFail: true, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := &AnalyzeGoldpinger{ + analyzer: &troubleshootv1beta2.GoldpingerAnalyze{}, + } + + got := a.podPingsAnalysis(tt.cao) + // Check existence of each want. Maps are not ordered, so we can't just compare + for _, want := range tt.want { + assert.Contains(t, got, want) + } + assert.Len(t, got, len(tt.want)) + }) + } +} + +func caoFixture(t *testing.T, path string) *checkAllOutput { + t.Helper() + + s := testutils.GetTestFixture(t, path) + var res checkAllOutput + err := json.Unmarshal([]byte(s), &res) + require.NoError(t, err) + + return &res +} diff --git a/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go b/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go index 62a2a2527..8fc62d8bf 100644 --- a/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go +++ b/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go @@ -225,6 +225,13 @@ type CertificatesAnalyze struct { Outcomes []*Outcome `json:"outcomes" yaml:"outcomes"` } +type GoldpingerAnalyze struct { + AnalyzeMeta `json:",inline" yaml:",inline"` + Outcomes []*Outcome `json:"outcomes,omitempty" yaml:"outcomes,omitempty"` + CollectorName string `json:"collectorName" yaml:"collectorName"` + FilePath string `json:"filePath,omitempty" yaml:"filePath,omitempty"` +} + type Analyze struct { ClusterVersion *ClusterVersion `json:"clusterVersion,omitempty" yaml:"clusterVersion,omitempty"` StorageClass *StorageClass `json:"storageClass,omitempty" yaml:"storageClass,omitempty"` @@ -256,4 +263,5 @@ type Analyze struct { Sysctl *SysctlAnalyze `json:"sysctl,omitempty" yaml:"sysctl,omitempty"` ClusterResource *ClusterResource `json:"clusterResource,omitempty" yaml:"clusterResource,omitempty"` Certificates *CertificatesAnalyze `json:"certificates,omitempty" yaml:"certificates,omitempty"` + Goldpinger *GoldpingerAnalyze `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"` } diff --git a/pkg/apis/troubleshoot/v1beta2/collector_shared.go b/pkg/apis/troubleshoot/v1beta2/collector_shared.go index a8d389bb6..4c16c74f1 100644 --- a/pkg/apis/troubleshoot/v1beta2/collector_shared.go +++ b/pkg/apis/troubleshoot/v1beta2/collector_shared.go @@ -259,6 +259,19 @@ type Helm struct { ReleaseName string `json:"releaseName,omitempty" yaml:"releaseName,omitempty"` } +type Goldpinger struct { + CollectorMeta `json:",inline" yaml:",inline"` + Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"` + PodLaunchOptions *PodLaunchOptions `json:"podLaunchOptions,omitempty" yaml:"podLaunchOptions,omitempty"` +} + +type PodLaunchOptions struct { + Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"` + Image string `json:"image,omitempty" yaml:"image,omitempty"` + ImagePullSecret *ImagePullSecrets `json:"imagePullSecret,omitempty" yaml:"imagePullSecret,omitempty"` + ServiceAccountName string `json:"serviceAccountName,omitempty" yaml:"serviceAccountName,omitempty"` +} + type Collect struct { ClusterInfo *ClusterInfo `json:"clusterInfo,omitempty" yaml:"clusterInfo,omitempty"` ClusterResources *ClusterResources `json:"clusterResources,omitempty" yaml:"clusterResources,omitempty"` @@ -284,6 +297,7 @@ type Collect struct { Sysctl *Sysctl `json:"sysctl,omitempty" yaml:"sysctl,omitempty"` Certificates *Certificates `json:"certificates,omitempty" yaml:"certificates,omitempty"` Helm *Helm `json:"helm,omitempty" yaml:"helm,omitempty"` + Goldpinger *Goldpinger `json:"goldpinger,omitempty" yaml:"goldpinger,omitempty"` } func (c *Collect) AccessReviewSpecs(overrideNS string) []authorizationv1.SelfSubjectAccessReviewSpec { diff --git a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go index 81e96f6a3..0bc67540e 100644 --- a/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/troubleshoot/v1beta2/zz_generated.deepcopy.go @@ -204,6 +204,11 @@ func (in *Analyze) DeepCopyInto(out *Analyze) { *out = new(CertificatesAnalyze) (*in).DeepCopyInto(*out) } + if in.Goldpinger != nil { + in, out := &in.Goldpinger, &out.Goldpinger + *out = new(GoldpingerAnalyze) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Analyze. @@ -892,6 +897,11 @@ func (in *Collect) DeepCopyInto(out *Collect) { *out = new(Helm) (*in).DeepCopyInto(*out) } + if in.Goldpinger != nil { + in, out := &in.Goldpinger, &out.Goldpinger + *out = new(Goldpinger) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Collect. @@ -1508,6 +1518,54 @@ func (in *Get) DeepCopy() *Get { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Goldpinger) DeepCopyInto(out *Goldpinger) { + *out = *in + in.CollectorMeta.DeepCopyInto(&out.CollectorMeta) + if in.PodLaunchOptions != nil { + in, out := &in.PodLaunchOptions, &out.PodLaunchOptions + *out = new(PodLaunchOptions) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Goldpinger. +func (in *Goldpinger) DeepCopy() *Goldpinger { + if in == nil { + return nil + } + out := new(Goldpinger) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GoldpingerAnalyze) DeepCopyInto(out *GoldpingerAnalyze) { + *out = *in + in.AnalyzeMeta.DeepCopyInto(&out.AnalyzeMeta) + if in.Outcomes != nil { + in, out := &in.Outcomes, &out.Outcomes + *out = make([]*Outcome, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(Outcome) + (*in).DeepCopyInto(*out) + } + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GoldpingerAnalyze. +func (in *GoldpingerAnalyze) DeepCopy() *GoldpingerAnalyze { + if in == nil { + return nil + } + out := new(GoldpingerAnalyze) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *HTTP) DeepCopyInto(out *HTTP) { *out = *in @@ -2989,6 +3047,26 @@ func (in *Outcome) DeepCopy() *Outcome { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PodLaunchOptions) DeepCopyInto(out *PodLaunchOptions) { + *out = *in + if in.ImagePullSecret != nil { + in, out := &in.ImagePullSecret, &out.ImagePullSecret + *out = new(ImagePullSecrets) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLaunchOptions. +func (in *PodLaunchOptions) DeepCopy() *PodLaunchOptions { + if in == nil { + return nil + } + out := new(PodLaunchOptions) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Post) DeepCopyInto(out *Post) { *out = *in diff --git a/pkg/collect/collector.go b/pkg/collect/collector.go index f6881daf8..070bc642f 100644 --- a/pkg/collect/collector.go +++ b/pkg/collect/collector.go @@ -116,6 +116,8 @@ func GetCollector(collector *troubleshootv1beta2.Collect, bundlePath string, nam return &CollectCertificates{collector.Certificates, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true case collector.Helm != nil: return &CollectHelm{collector.Helm, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true + case collector.Goldpinger != nil: + return &CollectGoldpinger{collector.Goldpinger, bundlePath, namespace, clientConfig, client, ctx, RBACErrors}, true default: return nil, false } @@ -198,6 +200,8 @@ func getCollectorName(c interface{}) string { collector = "certificates" case *CollectHelm: collector = "helm" + case *CollectGoldpinger: + collector = "goldpinger" default: collector = "" } diff --git a/pkg/collect/exec.go b/pkg/collect/exec.go index f41e681af..b2c6a4d56 100644 --- a/pkg/collect/exec.go +++ b/pkg/collect/exec.go @@ -47,6 +47,7 @@ func (c *CollectExec) Collect(progressChan chan<- interface{}) (CollectorResult, errCh := make(chan error, 1) resultCh := make(chan CollectorResult, 1) + // TODO: Use a context with timeout instead of a goroutine go func() { b, err := execWithoutTimeout(c.ClientConfig, c.BundlePath, c.Collector) if err != nil { @@ -78,12 +79,12 @@ func execWithoutTimeout(clientConfig *rest.Config, bundlePath string, execCollec pods, podsErrors := listPodsInSelectors(ctx, client, execCollector.Namespace, execCollector.Selector) if len(podsErrors) > 0 { - output.SaveResult(bundlePath, getExecErrosFileName(execCollector), marshalErrors(podsErrors)) + output.SaveResult(bundlePath, getExecErrorsFileName(execCollector), marshalErrors(podsErrors)) } if len(pods) > 0 { for _, pod := range pods { - stdout, stderr, execErrors := getExecOutputs(clientConfig, client, pod, execCollector) + stdout, stderr, execErrors := getExecOutputs(ctx, clientConfig, client, pod, execCollector) path := filepath.Join(execCollector.Name, pod.Namespace, pod.Name) if len(stdout) > 0 { @@ -103,7 +104,9 @@ func execWithoutTimeout(clientConfig *rest.Config, bundlePath string, execCollec return output, nil } -func getExecOutputs(clientConfig *rest.Config, client *kubernetes.Clientset, pod corev1.Pod, execCollector *troubleshootv1beta2.Exec) ([]byte, []byte, []string) { +func getExecOutputs( + ctx context.Context, clientConfig *rest.Config, client *kubernetes.Clientset, pod corev1.Pod, execCollector *troubleshootv1beta2.Exec, +) ([]byte, []byte, []string) { container := pod.Spec.Containers[0].Name if execCollector.ContainerName != "" { container = execCollector.ContainerName @@ -133,7 +136,7 @@ func getExecOutputs(clientConfig *rest.Config, client *kubernetes.Clientset, pod stdout := new(bytes.Buffer) stderr := new(bytes.Buffer) - err = exec.Stream(remotecommand.StreamOptions{ + err = exec.StreamWithContext(ctx, remotecommand.StreamOptions{ Stdin: nil, Stdout: stdout, Stderr: stderr, @@ -147,7 +150,7 @@ func getExecOutputs(clientConfig *rest.Config, client *kubernetes.Clientset, pod return stdout.Bytes(), stderr.Bytes(), nil } -func getExecErrosFileName(execCollector *troubleshootv1beta2.Exec) string { +func getExecErrorsFileName(execCollector *troubleshootv1beta2.Exec) string { if len(execCollector.Name) > 0 { return fmt.Sprintf("%s-errors.json", execCollector.Name) } diff --git a/pkg/collect/goldpinger.go b/pkg/collect/goldpinger.go new file mode 100644 index 000000000..588840777 --- /dev/null +++ b/pkg/collect/goldpinger.go @@ -0,0 +1,214 @@ +package collect + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strconv" + "time" + + "github.com/pkg/errors" + "github.com/replicatedhq/troubleshoot/internal/util" + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + "github.com/replicatedhq/troubleshoot/pkg/constants" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" +) + +// Collect goldpinger results from goldpinger service running in a cluster +// The results are stored in goldpinger/check_all.json since we use +// the /check_all endpoint +type CollectGoldpinger struct { + Collector *troubleshootv1beta2.Goldpinger + BundlePath string + Namespace string + ClientConfig *rest.Config + Client kubernetes.Interface + Context context.Context + RBACErrors +} + +func (c *CollectGoldpinger) Title() string { + return getCollectorName(c) +} + +func (c *CollectGoldpinger) IsExcluded() (bool, error) { + return isExcluded(c.Collector.Exclude) +} + +func (c *CollectGoldpinger) Collect(progressChan chan<- interface{}) (CollectorResult, error) { + output := NewResult() + var results []byte + var err error + + if util.IsInCluster() { + klog.V(2).Infof("Collector running in cluster, querying goldpinger endpoint straight away") + results, err = c.fetchCheckAllOutput() + if err != nil { + errMsg := fmt.Sprintf("Failed to query goldpinger endpoint in cluster: %v", err) + klog.V(2).Infof(errMsg) + err = output.SaveResult(c.BundlePath, "goldpinger/error.txt", bytes.NewBuffer([]byte(errMsg))) + return output, err + } + } else { + klog.V(2).Infof("Launch pod to query goldpinger endpoint then collect results from pod logs") + results, err = c.runPodAndCollectGPResults(progressChan) + if err != nil { + errMsg := fmt.Sprintf("Failed to run pod to collect goldpinger results: %v", err) + klog.V(2).Infof(errMsg) + err = output.SaveResult(c.BundlePath, "goldpinger/error.txt", bytes.NewBuffer([]byte(errMsg))) + return output, err + } + } + + err = output.SaveResult(c.BundlePath, constants.GP_CHECK_ALL_RESULTS_PATH, bytes.NewBuffer(results)) + return output, err +} + +func (c *CollectGoldpinger) fetchCheckAllOutput() ([]byte, error) { + client := &http.Client{ + Timeout: time.Minute, // Long enough timeout + } + + req, err := http.NewRequestWithContext(c.Context, "GET", c.endpoint(), nil) + if err != nil { + return nil, err + } + resp, err := client.Do(req) + if err != nil { + return nil, err + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, err + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("unexpected status code: %d, body: %s", resp.StatusCode, string(body)) + } + + return body, nil +} + +func (c *CollectGoldpinger) runPodAndCollectGPResults(progressChan chan<- interface{}) ([]byte, error) { + rest.InClusterConfig() + + namespace := "default" + serviceAccountName := "" + image := constants.GP_DEFAULT_IMAGE + var imagePullSecret *troubleshootv1beta2.ImagePullSecrets + + if c.Collector.PodLaunchOptions != nil { + if c.Collector.PodLaunchOptions.Namespace != "" { + namespace = c.Collector.PodLaunchOptions.Namespace + } + + if c.Collector.PodLaunchOptions.ServiceAccountName != "" { + serviceAccountName = c.Collector.PodLaunchOptions.ServiceAccountName + if err := checkForExistingServiceAccount(c.Context, c.Client, namespace, serviceAccountName); err != nil { + return nil, err + } + } + + if c.Collector.PodLaunchOptions.Image != "" { + image = c.Collector.PodLaunchOptions.Image + } + imagePullSecret = c.Collector.PodLaunchOptions.ImagePullSecret + } + + runPodCollectorName := "ts-goldpinger-collector" + collectorContainerName := "collector" + runPodSpec := &troubleshootv1beta2.RunPod{ + CollectorMeta: troubleshootv1beta2.CollectorMeta{ + CollectorName: runPodCollectorName, + }, + Name: runPodCollectorName, + Namespace: namespace, + Timeout: time.Minute.String(), + ImagePullSecret: imagePullSecret, + PodSpec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyNever, + ServiceAccountName: serviceAccountName, + Containers: []corev1.Container{ + { + Image: image, + ImagePullPolicy: corev1.PullIfNotPresent, + Name: collectorContainerName, + Command: []string{"wget"}, + Args: []string{"-q", "-O-", c.endpoint()}, + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("64Mi"), + }, + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("200m"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + }, + }, + }, + }, + } + + rbacErrors := c.GetRBACErrors() + // Pass an empty bundle path since we don't need to save the results + runPodCollector := &CollectRunPod{runPodSpec, "", c.Namespace, c.ClientConfig, c.Client, c.Context, rbacErrors} + + output, err := runPodCollector.Collect(progressChan) + if err != nil { + return nil, err + } + + // Check if the collector container exited with an error + var pod corev1.Pod + err = json.Unmarshal(output[fmt.Sprintf("%s/%s.json", runPodCollectorName, runPodCollectorName)], &pod) + if err != nil { + return nil, err + } + + var terminationError *corev1.ContainerStateTerminated + for _, status := range pod.Status.ContainerStatuses { + if status.Name == collectorContainerName { + if status.State.Terminated.ExitCode != 0 { + terminationError = status.State.Terminated + } + } + } + + podLogs := output[fmt.Sprintf("%s/%s.log", runPodCollectorName, runPodCollectorName)] + if terminationError != nil { + m := map[string]string{ + "podName": pod.Name, + "exitCode": strconv.Itoa(int(terminationError.ExitCode)), + "reason": terminationError.Reason, + "message": terminationError.Message, + "logs": string(podLogs), + } + + b, err := json.MarshalIndent(m, "", " ") + if err != nil { + return nil, err + } + return nil, errors.New(string(b)) + } + return podLogs, nil +} + +func (c *CollectGoldpinger) endpoint() string { + namespace := c.Collector.Namespace + if namespace == "" { + namespace = constants.GP_DEFAULT_NAMESPACE + } + + return fmt.Sprintf("http://goldpinger.%s.svc.cluster.local:80/check_all", namespace) +} diff --git a/pkg/collect/run.go b/pkg/collect/run.go index e4155ea31..629ae1ef1 100644 --- a/pkg/collect/run.go +++ b/pkg/collect/run.go @@ -43,7 +43,7 @@ func (c *CollectRun) Collect(progressChan chan<- interface{}) (CollectorResult, serviceAccountName = c.Collector.ServiceAccountName } - if err := checkForExistingServiceAccount(c.Client, namespace, serviceAccountName); err != nil { + if err := checkForExistingServiceAccount(c.Context, c.Client, namespace, serviceAccountName); err != nil { return nil, err } diff --git a/pkg/collect/run_pod.go b/pkg/collect/run_pod.go index cabc3ed82..c2263eb47 100644 --- a/pkg/collect/run_pod.go +++ b/pkg/collect/run_pod.go @@ -6,7 +6,7 @@ import ( "encoding/base64" "encoding/json" "fmt" - "io/ioutil" + "io" "path/filepath" "sync" "time" @@ -44,8 +44,9 @@ func (c *CollectRunPod) IsExcluded() (bool, error) { return isExcluded(c.Collector.Exclude) } -func (c *CollectRunPod) Collect(progressChan chan<- interface{}) (CollectorResult, error) { +func (c *CollectRunPod) Collect(progressChan chan<- interface{}) (result CollectorResult, err error) { ctx := context.Background() + result = NewResult() client, err := kubernetes.NewForConfig(c.ClientConfig) if err != nil { @@ -70,8 +71,6 @@ func (c *CollectRunPod) Collect(progressChan chan<- interface{}) (CollectorResul }() } - result := NewResult() - defer func() { result, err = savePodDetails(ctx, client, result, c.BundlePath, c.ClientConfig, pod, c.Collector) if err != nil { @@ -94,6 +93,7 @@ func (c *CollectRunPod) Collect(progressChan chan<- interface{}) (CollectorResul timeoutCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() + // TODO: Use context with timeout instead of go routine go func() { b, err := runWithoutTimeout(timeoutCtx, c.BundlePath, c.ClientConfig, pod, c.Collector) if err != nil { @@ -406,7 +406,7 @@ func RunPodLogs(ctx context.Context, client v1.CoreV1Interface, podSpec *corev1. } defer logs.Close() - return ioutil.ReadAll(logs) + return io.ReadAll(logs) } func savePodDetails(ctx context.Context, client *kubernetes.Clientset, output CollectorResult, bundlePath string, clientConfig *rest.Config, pod *corev1.Pod, runPodCollector *troubleshootv1beta2.RunPod) (CollectorResult, error) { diff --git a/pkg/collect/runner.go b/pkg/collect/runner.go index 5af87141b..0d62d4add 100644 --- a/pkg/collect/runner.go +++ b/pkg/collect/runner.go @@ -36,7 +36,7 @@ type podRunner struct { } func (r *podRunner) run(ctx context.Context, collector *troubleshootv1beta2.HostCollect, namespace string, name string, nodeName string, results chan<- map[string][]byte) error { - cm, pod, err := CreateCollector(r.client, r.scheme, nil, name, namespace, nodeName, runnerServiceAccountName, runnerJobType, collector, r.image, r.pullPolicy) + cm, pod, err := CreateCollector(ctx, r.client, r.scheme, nil, name, namespace, nodeName, runnerServiceAccountName, runnerJobType, collector, r.image, r.pullPolicy) if err != nil { return errors.Wrap(err, "failed to create collector") } @@ -62,13 +62,13 @@ func (r *podRunner) run(ctx context.Context, collector *troubleshootv1beta2.Host return nil } -func CreateCollector(client *kubernetes.Clientset, scheme *runtime.Scheme, ownerRef metav1.Object, name string, namespace string, nodeName string, serviceAccountName string, jobType string, collect *troubleshootv1beta2.HostCollect, image string, pullPolicy string) (*corev1.ConfigMap, *corev1.Pod, error) { +func CreateCollector(ctx context.Context, client *kubernetes.Clientset, scheme *runtime.Scheme, ownerRef metav1.Object, name string, namespace string, nodeName string, serviceAccountName string, jobType string, collect *troubleshootv1beta2.HostCollect, image string, pullPolicy string) (*corev1.ConfigMap, *corev1.Pod, error) { configMap, err := createCollectorConfigMap(client, scheme, ownerRef, name, namespace, collect) if err != nil { return nil, nil, err } - pod, err := createCollectorPod(client, scheme, ownerRef, name, namespace, nodeName, serviceAccountName, jobType, collect, configMap, image, pullPolicy) + pod, err := createCollectorPod(ctx, client, scheme, ownerRef, name, namespace, nodeName, serviceAccountName, jobType, collect, configMap, image, pullPolicy) if err != nil { return nil, nil, err } @@ -143,19 +143,19 @@ func createCollectorConfigMap(client *kubernetes.Clientset, scheme *runtime.Sche return created, nil } -func createCollectorPod(client kubernetes.Interface, scheme *runtime.Scheme, ownerRef metav1.Object, name string, namespace string, nodeName string, serviceAccountName string, jobType string, collect *troubleshootv1beta2.HostCollect, configMap *corev1.ConfigMap, image string, pullPolicy string) (*corev1.Pod, error) { +func createCollectorPod(ctx context.Context, client kubernetes.Interface, scheme *runtime.Scheme, ownerRef metav1.Object, name string, namespace string, nodeName string, serviceAccountName string, jobType string, collect *troubleshootv1beta2.HostCollect, configMap *corev1.ConfigMap, image string, pullPolicy string) (*corev1.Pod, error) { if serviceAccountName == "" { serviceAccountName = "default" } - _, err := client.CoreV1().Pods(namespace).Get(context.Background(), name, metav1.GetOptions{}) + _, err := client.CoreV1().Pods(namespace).Get(ctx, name, metav1.GetOptions{}) if err == nil { return nil, fmt.Errorf("pod %q already exists", name) } else if !kuberneteserrors.IsNotFound(err) { return nil, err } - if err := checkForExistingServiceAccount(client, namespace, serviceAccountName); err != nil { + if err := checkForExistingServiceAccount(ctx, client, namespace, serviceAccountName); err != nil { return nil, err } @@ -261,7 +261,7 @@ func createCollectorPod(client kubernetes.Interface, scheme *runtime.Scheme, own var created *corev1.Pod createFn := func() error { - created, err = client.CoreV1().Pods(namespace).Create(context.Background(), &pod, metav1.CreateOptions{}) + created, err = client.CoreV1().Pods(namespace).Create(ctx, &pod, metav1.CreateOptions{}) if err != nil && !kerrors.IsAlreadyExists(err) { return err } diff --git a/pkg/collect/util.go b/pkg/collect/util.go index 2cae17bf0..5716380df 100644 --- a/pkg/collect/util.go +++ b/pkg/collect/util.go @@ -241,8 +241,8 @@ func getTLSParamsFromSecret(ctx context.Context, client kubernetes.Interface, se return caCert, clientCert, clientKey, nil } -func checkForExistingServiceAccount(client kubernetes.Interface, namespace string, serviceAccountName string) error { - _, err := client.CoreV1().ServiceAccounts(namespace).Get(context.Background(), serviceAccountName, metav1.GetOptions{}) +func checkForExistingServiceAccount(ctx context.Context, client kubernetes.Interface, namespace string, serviceAccountName string) error { + _, err := client.CoreV1().ServiceAccounts(namespace).Get(ctx, serviceAccountName, metav1.GetOptions{}) if err != nil { return errors.Wrapf(err, "Failed to get service account %s", serviceAccountName) diff --git a/pkg/collect/util_test.go b/pkg/collect/util_test.go index 21cdec4b2..1502da58c 100644 --- a/pkg/collect/util_test.go +++ b/pkg/collect/util_test.go @@ -323,11 +323,11 @@ func Test_checkForExistingServiceAccount(t *testing.T) { _, err := client.CoreV1().ServiceAccounts(tt.namespace).Create(ctx, tt.mockServiceAccount, metav1.CreateOptions{}) require.NoError(t, err) - err = checkForExistingServiceAccount(client, tt.namespace, tt.serviceAccountName) + err = checkForExistingServiceAccount(ctx, client, tt.namespace, tt.serviceAccountName) assert.Equal(t, tt.wantErr, err != nil) } - err := checkForExistingServiceAccount(client, tt.namespace, tt.serviceAccountName) + err := checkForExistingServiceAccount(ctx, client, tt.namespace, tt.serviceAccountName) assert.Equal(t, tt.wantErr, err != nil) }) } diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index e69a6986f..4631b0126 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -91,4 +91,15 @@ const ( // This is the muximum size the buffer can grow to // Its not what the buffer will be allocated to initially SCANNER_MAX_SIZE = 10 * 1024 * 1024 // 10MB + + // Goldpinger constants + GP_CHECK_ALL_RESULTS_PATH = "goldpinger/check_all.json" + + // GP_DEFAULT_IMAGE is the default image used for goldpinger + // "replicated/kurl-util" would be better + // since its always in airgap envs, but its tagged + // with the kurl versions which would not work since they + // are not always the same + GP_DEFAULT_IMAGE = "alpine:3" + GP_DEFAULT_NAMESPACE = "default" ) diff --git a/schemas/analyzer-troubleshoot-v1beta2.json b/schemas/analyzer-troubleshoot-v1beta2.json index 592a4c94e..f9f12b937 100644 --- a/schemas/analyzer-troubleshoot-v1beta2.json +++ b/schemas/analyzer-troubleshoot-v1beta2.json @@ -824,6 +824,85 @@ } } }, + "goldpinger": { + "type": "object", + "required": [ + "collectorName" + ], + "properties": { + "annotations": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "checkName": { + "type": "string" + }, + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "filePath": { + "type": "string" + }, + "outcomes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "fail": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + }, + "pass": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + }, + "warn": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + } + } + } + }, + "strict": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + } + } + }, "imagePullSecret": { "type": "object", "required": [ diff --git a/schemas/collector-troubleshoot-v1beta2.json b/schemas/collector-troubleshoot-v1beta2.json index 2cdbfee9f..befed5e47 100644 --- a/schemas/collector-troubleshoot-v1beta2.json +++ b/schemas/collector-troubleshoot-v1beta2.json @@ -435,6 +435,51 @@ } } }, + "goldpinger": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "namespace": { + "type": "string" + }, + "podLaunchOptions": { + "type": "object", + "properties": { + "image": { + "type": "string" + }, + "imagePullSecret": { + "type": "object", + "properties": { + "data": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "name": { + "type": "string" + }, + "type": { + "type": "string" + } + } + }, + "namespace": { + "type": "string" + }, + "serviceAccountName": { + "type": "string" + } + } + } + } + }, "helm": { "type": "object", "properties": { diff --git a/schemas/preflight-troubleshoot-v1beta2.json b/schemas/preflight-troubleshoot-v1beta2.json index db19376bd..52b951024 100644 --- a/schemas/preflight-troubleshoot-v1beta2.json +++ b/schemas/preflight-troubleshoot-v1beta2.json @@ -824,6 +824,85 @@ } } }, + "goldpinger": { + "type": "object", + "required": [ + "collectorName" + ], + "properties": { + "annotations": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "checkName": { + "type": "string" + }, + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "filePath": { + "type": "string" + }, + "outcomes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "fail": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + }, + "pass": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + }, + "warn": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + } + } + } + }, + "strict": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + } + } + }, "imagePullSecret": { "type": "object", "required": [ @@ -2761,6 +2840,51 @@ } } }, + "goldpinger": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "namespace": { + "type": "string" + }, + "podLaunchOptions": { + "type": "object", + "properties": { + "image": { + "type": "string" + }, + "imagePullSecret": { + "type": "object", + "properties": { + "data": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "name": { + "type": "string" + }, + "type": { + "type": "string" + } + } + }, + "namespace": { + "type": "string" + }, + "serviceAccountName": { + "type": "string" + } + } + } + } + }, "helm": { "type": "object", "properties": { diff --git a/schemas/supportbundle-troubleshoot-v1beta2.json b/schemas/supportbundle-troubleshoot-v1beta2.json index 90f50a657..dde428bfd 100644 --- a/schemas/supportbundle-troubleshoot-v1beta2.json +++ b/schemas/supportbundle-troubleshoot-v1beta2.json @@ -870,6 +870,85 @@ } } }, + "goldpinger": { + "type": "object", + "required": [ + "collectorName" + ], + "properties": { + "annotations": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "checkName": { + "type": "string" + }, + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "filePath": { + "type": "string" + }, + "outcomes": { + "type": "array", + "items": { + "type": "object", + "properties": { + "fail": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + }, + "pass": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + }, + "warn": { + "type": "object", + "properties": { + "message": { + "type": "string" + }, + "uri": { + "type": "string" + }, + "when": { + "type": "string" + } + } + } + } + } + }, + "strict": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + } + } + }, "imagePullSecret": { "type": "object", "required": [ @@ -2807,6 +2886,51 @@ } } }, + "goldpinger": { + "type": "object", + "properties": { + "collectorName": { + "type": "string" + }, + "exclude": { + "oneOf": [{"type": "string"},{"type": "boolean"}] + }, + "namespace": { + "type": "string" + }, + "podLaunchOptions": { + "type": "object", + "properties": { + "image": { + "type": "string" + }, + "imagePullSecret": { + "type": "object", + "properties": { + "data": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "name": { + "type": "string" + }, + "type": { + "type": "string" + } + } + }, + "namespace": { + "type": "string" + }, + "serviceAccountName": { + "type": "string" + } + } + } + } + }, "helm": { "type": "object", "properties": { diff --git a/test/e2e/support-bundle/cluster_pod_statuses_e2e_test.go b/test/e2e/support-bundle/cluster_pod_statuses_e2e_test.go index 522fdf8fa..9423099b3 100644 --- a/test/e2e/support-bundle/cluster_pod_statuses_e2e_test.go +++ b/test/e2e/support-bundle/cluster_pod_statuses_e2e_test.go @@ -42,7 +42,7 @@ func TestDeploymentPod(t *testing.T) { tarPath := fmt.Sprintf("%s.tar.gz", supportBundleName) targetFile := fmt.Sprintf("%s/analysis.json", supportBundleName) - cmd := exec.Command("../../../bin/support-bundle", "spec/pod.yaml", "--interactive=false", fmt.Sprintf("-o=%s", supportBundleName)) + cmd := exec.CommandContext(ctx, sbBinary(), "spec/pod.yaml", "--interactive=false", fmt.Sprintf("-o=%s", supportBundleName)) cmd.Stdout = &out err := cmd.Run() if err != nil { diff --git a/test/e2e/support-bundle/cluster_resources_e2e_test.go b/test/e2e/support-bundle/cluster_resources_e2e_test.go index 006d9ca00..1849194c8 100644 --- a/test/e2e/support-bundle/cluster_resources_e2e_test.go +++ b/test/e2e/support-bundle/cluster_resources_e2e_test.go @@ -69,7 +69,7 @@ func TestClusterResources(t *testing.T) { supportBundleName := "cluster-resources" tarPath := fmt.Sprintf("%s.tar.gz", supportBundleName) targetFolder := fmt.Sprintf("%s/cluster-resources/", supportBundleName) - cmd := exec.Command("../../../bin/support-bundle", "spec/clusterResources.yaml", "--interactive=false", fmt.Sprintf("-o=%s", supportBundleName)) + cmd := exec.CommandContext(ctx, sbBinary(), "spec/clusterResources.yaml", "--interactive=false", fmt.Sprintf("-o=%s", supportBundleName)) cmd.Stdout = &out err := cmd.Run() if err != nil { diff --git a/test/e2e/support-bundle/goldpinger_collector_e2e_test.go b/test/e2e/support-bundle/goldpinger_collector_e2e_test.go new file mode 100644 index 000000000..3b8d5b2aa --- /dev/null +++ b/test/e2e/support-bundle/goldpinger_collector_e2e_test.go @@ -0,0 +1,95 @@ +package e2e + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/replicatedhq/troubleshoot/internal/testutils" + "github.com/replicatedhq/troubleshoot/pkg/convert" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "sigs.k8s.io/e2e-framework/pkg/envconf" + "sigs.k8s.io/e2e-framework/pkg/features" + "sigs.k8s.io/e2e-framework/third_party/helm" +) + +var specTemplate = ` +apiVersion: troubleshoot.sh/v1beta2 +kind: SupportBundle +metadata: + name: goldpinger +spec: + collectors: + - goldpinger: + namespace: $NAMESPACE + analyzers: + - goldpinger: {} +` + +func Test_GoldpingerCollector(t *testing.T) { + releaseName := "goldpinger" + + feature := features.New("Goldpinger collector and analyser"). + Setup(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { + cluster := getClusterFromContext(t, ctx, ClusterName) + manager := helm.New(cluster.GetKubeconfig()) + err := manager.RunInstall( + helm.WithName(releaseName), + helm.WithNamespace(c.Namespace()), + helm.WithChart(testutils.TestFixtureFilePath(t, "charts/goldpinger-6.0.1.tgz")), + helm.WithWait(), + helm.WithTimeout("1m"), + ) + require.NoError(t, err) + return ctx + }). + Assess("collect and analyse goldpinger pings", func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { + var out bytes.Buffer + + namespace := c.Namespace() + supportBundleName := "goldpinger-test" + spec := strings.ReplaceAll(specTemplate, "$NAMESPACE", namespace) + specPath := filepath.Join(t.TempDir(), "goldpinger.yaml") + + err := os.WriteFile(specPath, []byte(spec), 0644) + require.NoError(t, err) + + tarPath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.tar.gz", supportBundleName)) + cmd := exec.CommandContext(ctx, sbBinary(), specPath, "--interactive=false", "-v=2", fmt.Sprintf("-o=%s", tarPath)) + cmd.Stdout = &out + err = cmd.Run() + t.Log(out.String()) + require.NoError(t, err) + + analysisJSON, err := readFileFromTar(tarPath, fmt.Sprintf("%s/analysis.json", supportBundleName)) + require.NoError(t, err) + + var analysisResults []convert.Result + err = json.Unmarshal(analysisJSON, &analysisResults) + require.NoError(t, err) + + // Check that we analysed collected goldpinger results. + // There won't be any ping results because goldpinger would not have run yet. + // The test is fine since this checks that we query the goldpinger results correctly + // and the analyser is working. + require.Equal(t, 1, len(analysisResults)) + assert.True(t, strings.HasPrefix(analysisResults[0].Name, "missing.ping.results.for.goldpinger.")) + assert.Equal(t, convert.SeverityWarn, analysisResults[0].Severity) + return ctx + }). + Teardown(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { + cluster := getClusterFromContext(t, ctx, ClusterName) + manager := helm.New(cluster.GetKubeconfig()) + manager.RunUninstall(helm.WithName(releaseName), helm.WithNamespace(c.Namespace())) + return ctx + }). + Feature() + testenv.Test(t, feature) +} diff --git a/test/e2e/support-bundle/helm_collector_e2e_test.go b/test/e2e/support-bundle/helm_collector_e2e_test.go index 63c77726f..0475a574b 100644 --- a/test/e2e/support-bundle/helm_collector_e2e_test.go +++ b/test/e2e/support-bundle/helm_collector_e2e_test.go @@ -11,7 +11,6 @@ import ( "testing" "sigs.k8s.io/e2e-framework/pkg/envconf" - "sigs.k8s.io/e2e-framework/pkg/envfuncs" "sigs.k8s.io/e2e-framework/pkg/features" "sigs.k8s.io/e2e-framework/third_party/helm" @@ -26,10 +25,7 @@ func Test_HelmCollector(t *testing.T) { feature := features.New("Collector Helm Release"). Setup(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { - cluster, ok := envfuncs.GetKindClusterFromContext(ctx, ClusterName) - if !ok { - t.Fatalf("Failed to extract kind cluster %s from context", ClusterName) - } + cluster := getClusterFromContext(t, ctx, ClusterName) manager := helm.New(cluster.GetKubeconfig()) manager.RunInstall(helm.WithName(releaseName), helm.WithNamespace(c.Namespace()), helm.WithChart(filepath.Join(curDir, "testdata/charts/nginx-15.2.0.tgz")), helm.WithWait(), helm.WithTimeout("1m")) //ignore error to allow test to speed up, helm collector will catch the pending or deployed helm release status @@ -43,7 +39,7 @@ func Test_HelmCollector(t *testing.T) { namespace := c.Namespace() tarPath := fmt.Sprintf("%s.tar.gz", supportBundleName) targetFile := fmt.Sprintf("%s/helm/%s.json", supportBundleName, namespace) - cmd := exec.Command("../../../bin/support-bundle", "spec/helm.yaml", "--interactive=false", fmt.Sprintf("-o=%s", supportBundleName)) + cmd := exec.CommandContext(ctx, sbBinary(), "spec/helm.yaml", "--interactive=false", fmt.Sprintf("-o=%s", supportBundleName)) cmd.Stdout = &out err := cmd.Run() if err != nil { @@ -72,6 +68,12 @@ func Test_HelmCollector(t *testing.T) { assert.Equal(t, "nginx", results[0].Chart) return ctx }). + Teardown(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { + cluster := getClusterFromContext(t, ctx, ClusterName) + manager := helm.New(cluster.GetKubeconfig()) + manager.RunUninstall(helm.WithName(releaseName), helm.WithNamespace(c.Namespace())) + return ctx + }). Feature() testenv.Test(t, feature) } diff --git a/test/e2e/support-bundle/main_e2e_test.go b/test/e2e/support-bundle/main_e2e_test.go index dff2545fe..b1d96b763 100644 --- a/test/e2e/support-bundle/main_e2e_test.go +++ b/test/e2e/support-bundle/main_e2e_test.go @@ -4,6 +4,7 @@ import ( "archive/tar" "bytes" "compress/gzip" + "context" "fmt" "io" "os" @@ -14,6 +15,7 @@ import ( "sigs.k8s.io/e2e-framework/pkg/env" "sigs.k8s.io/e2e-framework/pkg/envconf" "sigs.k8s.io/e2e-framework/pkg/envfuncs" + "sigs.k8s.io/e2e-framework/support/kind" ) var testenv env.Environment @@ -24,16 +26,29 @@ func TestMain(m *testing.M) { testenv = env.New() namespace := envconf.RandomName("default", 16) testenv.Setup( - envfuncs.CreateKindCluster(ClusterName), + envfuncs.CreateCluster(kind.NewProvider(), ClusterName), envfuncs.CreateNamespace(namespace), ) testenv.Finish( envfuncs.DeleteNamespace(namespace), - envfuncs.DestroyKindCluster(ClusterName), + envfuncs.DestroyCluster(ClusterName), ) os.Exit(testenv.Run(m)) } +func getClusterFromContext(t *testing.T, ctx context.Context, clusterName string) *kind.Cluster { + provider, ok := envfuncs.GetClusterFromContext(ctx, ClusterName) + if !ok { + t.Fatalf("Failed to extract kind cluster %s from context", ClusterName) + } + cluster, ok := provider.(*kind.Cluster) + if !ok { + t.Fatalf("Failed to cast kind cluster %s from provider", ClusterName) + } + + return cluster +} + func readFilesAndFoldersFromTar(tarPath, targetFolder string) ([]string, []string, error) { file, err := os.Open(tarPath) if err != nil { @@ -112,5 +127,9 @@ func readFileFromTar(tarPath, targetFile string) ([]byte, error) { return buf.Bytes(), nil } } - return nil, fmt.Errorf("File not found: %s", targetFile) + return nil, fmt.Errorf("File not found: %q", targetFile) } + +func sbBinary() string { + return "../../../bin/support-bundle" +} \ No newline at end of file diff --git a/testdata/charts/goldpinger-6.0.1.tgz b/testdata/charts/goldpinger-6.0.1.tgz new file mode 100644 index 000000000..10e44e5e0 Binary files /dev/null and b/testdata/charts/goldpinger-6.0.1.tgz differ diff --git a/testdata/goldpinger/checkall-success.json b/testdata/goldpinger/checkall-success.json new file mode 100644 index 000000000..831f970e6 --- /dev/null +++ b/testdata/goldpinger/checkall-success.json @@ -0,0 +1,143 @@ +{ + "hosts": [ + { + "hostIP": "10.154.0.7", + "podIP": "10.32.0.11", + "podName": "goldpinger-kpz4g" + }, + { + "hostIP": "10.154.0.6", + "podIP": "10.32.2.2", + "podName": "goldpinger-k6d2j" + }, + { + "hostIP": "10.154.0.8", + "podIP": "10.32.1.2", + "podName": "goldpinger-5ck4d" + } + ], + "responses": { + "goldpinger-5ck4d": { + "HostIP": "10.154.0.8", + "OK": true, + "PodIP": "10.32.1.2", + "response": { + "podResults": { + "goldpinger-5ck4d": { + "HostIP": "10.154.0.8", + "OK": true, + "PingTime": "2023-12-06T18:06:20.161Z", + "PodIP": "10.32.1.2", + "response": { + "boot_time": "2023-12-06T17:05:45.419Z" + }, + "status-code": 200 + }, + "goldpinger-k6d2j": { + "HostIP": "10.154.0.6", + "OK": true, + "PingTime": "2023-12-06T18:06:14.566Z", + "PodIP": "10.32.2.2", + "response": { + "boot_time": "2023-12-06T17:05:51.891Z" + }, + "response-time-ms": 1, + "status-code": 200 + }, + "goldpinger-kpz4g": { + "HostIP": "10.154.0.7", + "OK": true, + "PingTime": "2023-12-06T18:06:23.707Z", + "PodIP": "10.32.0.11", + "response": { + "boot_time": "2023-12-06T16:58:27.431Z" + }, + "response-time-ms": 1, + "status-code": 200 + } + } + } + }, + "goldpinger-k6d2j": { + "HostIP": "10.154.0.6", + "OK": true, + "PodIP": "10.32.2.2", + "response": { + "podResults": { + "goldpinger-5ck4d": { + "HostIP": "10.154.0.8", + "OK": true, + "PingTime": "2023-12-06T18:06:25.458Z", + "PodIP": "10.32.1.2", + "response": { + "boot_time": "2023-12-06T17:05:45.419Z" + }, + "status-code": 200 + }, + "goldpinger-k6d2j": { + "HostIP": "10.154.0.6", + "OK": true, + "PingTime": "2023-12-06T18:06:24.382Z", + "PodIP": "10.32.2.2", + "response": { + "boot_time": "2023-12-06T17:05:51.891Z" + }, + "status-code": 200 + }, + "goldpinger-kpz4g": { + "HostIP": "10.154.0.7", + "OK": true, + "PingTime": "2023-12-06T18:06:13.015Z", + "PodIP": "10.32.0.11", + "response": { + "boot_time": "2023-12-06T16:58:27.431Z" + }, + "response-time-ms": 1, + "status-code": 200 + } + } + } + }, + "goldpinger-kpz4g": { + "HostIP": "10.154.0.7", + "OK": true, + "PodIP": "10.32.0.11", + "response": { + "podResults": { + "goldpinger-5ck4d": { + "HostIP": "10.154.0.8", + "OK": true, + "PingTime": "2023-12-06T18:05:59.907Z", + "PodIP": "10.32.1.2", + "response": { + "boot_time": "2023-12-06T17:05:45.419Z" + }, + "response-time-ms": 1, + "status-code": 200 + }, + "goldpinger-k6d2j": { + "HostIP": "10.154.0.6", + "OK": true, + "PingTime": "2023-12-06T18:06:13.706Z", + "PodIP": "10.32.2.2", + "response": { + "boot_time": "2023-12-06T17:05:51.891Z" + }, + "response-time-ms": 1, + "status-code": 200 + }, + "goldpinger-kpz4g": { + "HostIP": "10.154.0.7", + "OK": true, + "PingTime": "2023-12-06T18:05:59.226Z", + "PodIP": "10.32.0.11", + "response": { + "boot_time": "2023-12-06T16:58:27.431Z" + }, + "status-code": 200 + } + } + } + } + } +} \ No newline at end of file diff --git a/testdata/goldpinger/checkall-with-error.json b/testdata/goldpinger/checkall-with-error.json new file mode 100644 index 000000000..83ce0247d --- /dev/null +++ b/testdata/goldpinger/checkall-with-error.json @@ -0,0 +1,132 @@ +{ + "hosts": [ + { + "hostIP": "100.64.0.1", + "podIP": "10.32.0.9", + "podName": "goldpinger-4hctt" + }, + { + "hostIP": "100.64.0.2", + "podIP": "10.32.1.2", + "podName": "goldpinger-tbdsb" + }, + { + "hostIP": "100.64.0.3", + "podIP": "10.32.2.2", + "podName": "goldpinger-jj9mw" + } + ], + "responses": { + "goldpinger-4hctt": { + "HostIP": "100.64.0.1", + "OK": true, + "PodIP": "10.32.0.9", + "response": { + "podResults": { + "goldpinger-4hctt": { + "HostIP": "100.64.0.1", + "OK": true, + "PingTime": "2023-12-06T16:45:41.971Z", + "PodIP": "10.32.0.9", + "response": { + "boot_time": "2023-12-06T14:13:58.540Z" + }, + "status-code": 200 + }, + "goldpinger-jj9mw": { + "HostIP": "100.64.0.3", + "OK": false, + "PingTime": "2023-12-06T16:45:22.113Z", + "PodIP": "10.32.2.2", + "error": "Get \"http://10.32.2.2:80/ping\": context deadline exceeded", + "response-time-ms": 300, + "status-code": 504 + }, + "goldpinger-tbdsb": { + "HostIP": "100.64.0.2", + "OK": false, + "PingTime": "2023-12-06T16:45:23.265Z", + "PodIP": "10.32.1.2", + "error": "Get \"http://10.32.1.2:80/ping\": context deadline exceeded", + "response-time-ms": 300, + "status-code": 504 + } + } + } + }, + "goldpinger-jj9mw": { + "HostIP": "100.64.0.3", + "OK": true, + "PodIP": "10.32.2.2", + "response": { + "podResults": { + "goldpinger-4hctt": { + "HostIP": "100.64.0.1", + "OK": false, + "PingTime": "2023-12-06T16:45:38.170Z", + "PodIP": "10.32.0.9", + "error": "Get \"http://10.32.0.9:80/ping\": context deadline exceeded", + "response-time-ms": 300, + "status-code": 504 + }, + "goldpinger-jj9mw": { + "HostIP": "100.64.0.3", + "OK": true, + "PingTime": "2023-12-06T16:45:15.316Z", + "PodIP": "10.32.2.2", + "response": { + "boot_time": "2023-12-06T14:17:08.210Z" + }, + "status-code": 200 + }, + "goldpinger-tbdsb": { + "HostIP": "100.64.0.2", + "OK": false, + "PingTime": "2023-12-06T16:45:15.977Z", + "PodIP": "10.32.1.2", + "error": "Get \"http://10.32.1.2:80/ping\": context deadline exceeded", + "response-time-ms": 300, + "status-code": 504 + } + } + } + }, + "goldpinger-tbdsb": { + "HostIP": "100.64.0.2", + "OK": true, + "PodIP": "10.32.1.2", + "response": { + "podResults": { + "goldpinger-4hctt": { + "HostIP": "100.64.0.1", + "OK": false, + "PingTime": "2023-12-06T16:45:14.628Z", + "PodIP": "10.32.0.9", + "error": "Get \"http://10.32.0.9:80/ping\": context deadline exceeded", + "response-time-ms": 300, + "status-code": 504 + }, + "goldpinger-jj9mw": { + "HostIP": "100.64.0.3", + "OK": false, + "PingTime": "2023-12-06T16:45:42.398Z", + "PodIP": "10.32.2.2", + "error": "Get \"http://10.32.2.2:80/ping\": context deadline exceeded", + "response-time-ms": 300, + "status-code": 504 + }, + "goldpinger-tbdsb": { + "HostIP": "100.64.0.2", + "OK": true, + "PingTime": "2023-12-06T16:45:15.259Z", + "PodIP": "10.32.1.2", + "response": { + "boot_time": "2023-12-06T14:17:03.586Z" + }, + "status-code": 200 + } + } + } + } + } +} \ No newline at end of file