From fb1b4e0893fe6d1a93548d2c4819fde41e17ee13 Mon Sep 17 00:00:00 2001 From: Archit Sharma Date: Sun, 8 Oct 2023 20:55:17 +0000 Subject: [PATCH] feat: add velero analyzer (#806) Signed-off-by: Archit Sharma --- go.mod | 3 +- go.sum | 6 - pkg/analyze/analyzer.go | 21 ++ pkg/analyze/longhorn.go | 19 -- pkg/analyze/velero.go | 195 ++++++++++++++++++ .../troubleshoot/v1beta2/analyzer_shared.go | 8 + pkg/supportbundle/test/velero.yaml | 2 + 7 files changed, 227 insertions(+), 27 deletions(-) create mode 100644 pkg/analyze/velero.go diff --git a/go.mod b/go.mod index 6ed2195fd..7d1945600 100644 --- a/go.mod +++ b/go.mod @@ -38,7 +38,7 @@ require ( github.com/vmware-tanzu/velero v1.11.1 go.opentelemetry.io/otel v1.18.0 go.opentelemetry.io/otel/sdk v1.18.0 - golang.org/x/exp v0.0.0-20230321023759-10a507213a29 + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 golang.org/x/sync v0.3.0 gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.28.2 @@ -87,7 +87,6 @@ require ( github.com/mistifyio/go-zfs/v3 v3.0.0 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect - github.com/onsi/ginkgo v1.14.0 // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/rubenv/sql-migrate v1.3.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect diff --git a/go.sum b/go.sum index 0ddd8deda..0009e8792 100644 --- a/go.sum +++ b/go.sum @@ -808,8 +808,6 @@ github.com/onsi/ginkgo v1.10.1/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+ github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE= github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.27.10 h1:naR28SdDFlqrG6kScpT8VWpu1xWY5nJRCF3XaYyBjhI= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= @@ -1148,7 +1146,6 @@ golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/ golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= @@ -1245,14 +1242,12 @@ golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -1267,7 +1262,6 @@ golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/pkg/analyze/analyzer.go b/pkg/analyze/analyzer.go index 0d6f4e681..a95b12a2c 100644 --- a/pkg/analyze/analyzer.go +++ b/pkg/analyze/analyzer.go @@ -1,16 +1,20 @@ package analyzer import ( + "bufio" + "bytes" "context" "encoding/json" "fmt" "reflect" "strconv" + "strings" "github.com/pkg/errors" troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" "github.com/replicatedhq/troubleshoot/pkg/constants" "github.com/replicatedhq/troubleshoot/pkg/multitype" + "github.com/replicatedhq/troubleshoot/pkg/redact" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" @@ -226,6 +230,8 @@ func getAnalyzer(analyzer *troubleshootv1beta2.Analyze) Analyzer { return &AnalyzeRedis{analyzer: analyzer.Redis} case analyzer.CephStatus != nil: return &AnalyzeCephStatus{analyzer: analyzer.CephStatus} + case analyzer.Velero != nil: + return &AnalyzeVelero{analyzer: analyzer.Velero} case analyzer.Longhorn != nil: return &AnalyzeLonghorn{analyzer: analyzer.Longhorn} case analyzer.RegistryImages != nil: @@ -265,3 +271,18 @@ func DedupAnalyzers(allAnalyzers []*troubleshootv1beta2.Analyze) []*troubleshoot } return finalAnalyzers } + +func stripRedactedLines(yaml []byte) []byte { + buf := bytes.NewBuffer(yaml) + scanner := bufio.NewScanner(buf) + + out := []byte{} + + for scanner.Scan() { + line := strings.ReplaceAll(scanner.Text(), redact.MASK_TEXT, "HIDDEN") + out = append(out, []byte(line)...) + out = append(out, '\n') + } + + return out +} diff --git a/pkg/analyze/longhorn.go b/pkg/analyze/longhorn.go index a9e824452..09e28b2bf 100644 --- a/pkg/analyze/longhorn.go +++ b/pkg/analyze/longhorn.go @@ -1,19 +1,15 @@ package analyzer import ( - "bufio" - "bytes" "fmt" "path/filepath" "reflect" - "strings" "github.com/pkg/errors" troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" "github.com/replicatedhq/troubleshoot/pkg/collect" longhornv1beta1 "github.com/replicatedhq/troubleshoot/pkg/longhorn/apis/longhorn/v1beta1" longhorntypes "github.com/replicatedhq/troubleshoot/pkg/longhorn/types" - "github.com/replicatedhq/troubleshoot/pkg/redact" "gopkg.in/yaml.v2" ) @@ -241,21 +237,6 @@ func analyzeLonghornEngine(engine *longhornv1beta1.Engine) *AnalyzeResult { return result } -func stripRedactedLines(yaml []byte) []byte { - buf := bytes.NewBuffer(yaml) - scanner := bufio.NewScanner(buf) - - out := []byte{} - - for scanner.Scan() { - line := strings.ReplaceAll(scanner.Text(), redact.MASK_TEXT, "HIDDEN") - out = append(out, []byte(line)...) - out = append(out, '\n') - } - - return out -} - func analyzeLonghornReplicaChecksums(volumeName string, checksums []map[string]string) *AnalyzeResult { result := &AnalyzeResult{ Title: fmt.Sprintf("Longhorn Volume Replica Corruption: %s", volumeName), diff --git a/pkg/analyze/velero.go b/pkg/analyze/velero.go new file mode 100644 index 000000000..dc9b7bd5f --- /dev/null +++ b/pkg/analyze/velero.go @@ -0,0 +1,195 @@ +package analyzer + +import ( + "fmt" + "path/filepath" + + "github.com/pkg/errors" + troubleshootv1beta2 "github.com/replicatedhq/troubleshoot/pkg/apis/troubleshoot/v1beta2" + "github.com/replicatedhq/troubleshoot/pkg/collect" + velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" + "gopkg.in/yaml.v2" +) + +type AnalyzeVelero struct { + analyzer *troubleshootv1beta2.VeleroAnalyze +} + +func (a *AnalyzeVelero) Title() string { + title := a.analyzer.CheckName + if title == "" { + title = "Velero" + } + + return title +} + +func (a *AnalyzeVelero) IsExcluded() (bool, error) { + return isExcluded(a.analyzer.Exclude) +} + +func (a *AnalyzeVelero) Analyze(getFile getCollectedFileContents, findFiles getChildCollectedFileContents) ([]*AnalyzeResult, error) { + results, err := a.veleroStatus(a.analyzer, getFile, findFiles) + if err != nil { + return nil, err + } + for i := range results { + results[i].Strict = a.analyzer.Strict.BoolOrDefaultFalse() + } + return results, nil +} + +func (a *AnalyzeVelero) veleroStatus(analyzer *troubleshootv1beta2.VeleroAnalyze, getFileContents getCollectedFileContents, findFiles getChildCollectedFileContents) ([]*AnalyzeResult, error) { + ns := collect.DefaultVeleroNamespace + if analyzer.Namespace != "" { + ns = analyzer.Namespace + } + + excludeFiles := []string{} + + // get backups.velero.io + backupsDir := collect.GetVeleroBackupsDirectory(ns) + backupsGlob := filepath.Join(backupsDir, "*") + backupsYaml, err := findFiles(backupsGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero backups files under %s", backupsDir) + } + backups := []*velerov1.Backup{} + for key, backupYaml := range backupsYaml { + backup := &velerov1.Backup{} + err := yaml.Unmarshal(backupYaml, backup) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal backup yaml from %s", key) + } + backups = append(backups, backup) + } + // fmt.Printf("\n..found %d backups\n", len(backups)) + + // get backuprepositories.velero.io + backupRpositoriesDir := collect.GetVeleroBackupRepositoriesDirectory(ns) + backupRepositoriesGlob := filepath.Join(backupRpositoriesDir, "*") + backupRepositoriesYaml, err := findFiles(backupRepositoriesGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero backup repositories files under %s", backupRpositoriesDir) + } + backupRepositories := []*velerov1.BackupRepository{} + for key, backupRepositoryYaml := range backupRepositoriesYaml { + backupRepository := &velerov1.BackupRepository{} + err := yaml.Unmarshal(backupRepositoryYaml, backupRepository) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal backup repository yaml from %s", key) + } + backupRepositories = append(backupRepositories, backupRepository) + } + + results := []*AnalyzeResult{} + + results = append(results, analyzeBackups(backups)...) + + // get restores.velero.io + // restoresDir := collect.GetVeleroRestoresDirectory(ns) + + // return print backup files found + // return nil, fmt.Errorf("found %d backups, %d backup repositories", len(backups), len(backupRepositories)) + results = append(results, analyzeBackupRepositories(backupRepositories)...) + + return aggregateResults(results), nil +} + +func analyzeBackups(backups []*velerov1.Backup) []*AnalyzeResult { + results := []*AnalyzeResult{} + + failedPhases := map[velerov1.BackupPhase]bool{ + velerov1.BackupPhaseFailed: true, + velerov1.BackupPhasePartiallyFailed: true, + velerov1.BackupPhaseFailedValidation: true, + velerov1.BackupPhaseFinalizingPartiallyFailed: true, + velerov1.BackupPhaseWaitingForPluginOperationsPartiallyFailed: true, + } + + for _, backup := range backups { + + if failedPhases[backup.Status.Phase] { + result := &AnalyzeResult{ + Title: fmt.Sprintf("Backup %s", backup.Name), + } + result.IsFail = true + // result.Strict = true + result.Message = fmt.Sprintf("Backup %s phase is %s", backup.Name, backup.Status.Phase) + results = append(results, result) + + } + // else if backup.Status.Phase == velerov1.BackupPhaseCompleted { + // result.IsPass = true + // // result.Strict = true + // } else { + // // may indicate phases like: + // // - velerov1.BackupPhaseWaitingForPluginOperations + // // - velerov1.BackupPhaseFinalizing + // result.IsWarn = true + // } + + } + + results = append(results, &AnalyzeResult{ + Title: "Velero Backups count", + IsPass: true, + Message: fmt.Sprintf("Found %d backups", len(backups)), + }) + + return results +} + +func analyzeBackupRepositories(backupRepositories []*velerov1.BackupRepository) []*AnalyzeResult { + + results := []*AnalyzeResult{} + + backupRepositoriesResult := &AnalyzeResult{ + Title: "At least 1 Velero Backup Repository configured", + } + if len(backupRepositories) == 0 { + backupRepositoriesResult.IsFail = true + backupRepositoriesResult.Message = "No backup repositories configured" + } else { + for _, backupRepository := range backupRepositories { + + if backupRepository.Status.Phase == velerov1.BackupRepositoryPhaseNotReady { + result := &AnalyzeResult{ + Title: fmt.Sprintf("Backup Repository %s", backupRepository.Name), + } + result.Message = fmt.Sprintf("Backup Repository [%s] is in phase NotReady", backupRepository.Name) + result.IsWarn = true + results = append(results, result) + // result.Strict = false + } + } + backupRepositoriesResult.IsPass = true + backupRepositoriesResult.Message = fmt.Sprintf("Found %d configured backup repositories", len(backupRepositories)) + } + results = append(results, backupRepositoriesResult) + + return results + +} + +func aggregateResults(results []*AnalyzeResult) []*AnalyzeResult { + out := []*AnalyzeResult{} + resultPass := false + for _, result := range results { + if result.IsPass { + resultPass = true + // continue + } + out = append(out, result) + } + + if resultPass && len(out) == 0 { + out = append(out, &AnalyzeResult{ + Title: "Velero Status", + IsPass: true, + Message: "Backups and CRDs are healthy", + }) + } + + return out +} diff --git a/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go b/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go index 4cb171d30..a206ad7bd 100644 --- a/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go +++ b/pkg/apis/troubleshoot/v1beta2/analyzer_shared.go @@ -186,6 +186,13 @@ type CephStatusAnalyze struct { Namespace string `json:"namespace" yaml:"namespace"` } +type VeleroAnalyze struct { + AnalyzeMeta `json:",inline" yaml:",inline"` + Outcomes []*Outcome `json:"outcomes" yaml:"outcomes"` + CollectorName string `json:"collectorName,omitempty" yaml:"collectorName,omitempty"` + Namespace string `json:"namespace" yaml:"namespace"` +} + type LonghornAnalyze struct { AnalyzeMeta `json:",inline" yaml:",inline"` Outcomes []*Outcome `json:"outcomes" yaml:"outcomes"` @@ -245,6 +252,7 @@ type Analyze struct { Mysql *DatabaseAnalyze `json:"mysql,omitempty" yaml:"mysql,omitempty"` Redis *DatabaseAnalyze `json:"redis,omitempty" yaml:"redis,omitempty"` CephStatus *CephStatusAnalyze `json:"cephStatus,omitempty" yaml:"cephStatus,omitempty"` + Velero *VeleroAnalyze `json:"velero,omitempty" yaml:"velero,omitempty"` Longhorn *LonghornAnalyze `json:"longhorn,omitempty" yaml:"longhorn,omitempty"` RegistryImages *RegistryImagesAnalyze `json:"registryImages,omitempty" yaml:"registryImages,omitempty"` WeaveReport *WeaveReportAnalyze `json:"weaveReport,omitempty" yaml:"weaveReport,omitempty"` diff --git a/pkg/supportbundle/test/velero.yaml b/pkg/supportbundle/test/velero.yaml index 74551be0f..93c97f568 100644 --- a/pkg/supportbundle/test/velero.yaml +++ b/pkg/supportbundle/test/velero.yaml @@ -5,3 +5,5 @@ metadata: spec: collectors: - velero: {} + analyzers: + - velero: {} \ No newline at end of file