diff --git a/pkg/analyze/velero.go b/pkg/analyze/velero.go index dc9b7bd5f..49763f47d 100644 --- a/pkg/analyze/velero.go +++ b/pkg/analyze/velero.go @@ -47,6 +47,23 @@ func (a *AnalyzeVelero) veleroStatus(analyzer *troubleshootv1beta2.VeleroAnalyze excludeFiles := []string{} + // get backuprepositories.velero.io + backupRepositoriesDir := collect.GetVeleroBackupRepositoriesDirectory(ns) + backupRepositoriesGlob := filepath.Join(backupRepositoriesDir, "*") + backupRepositoriesYaml, err := findFiles(backupRepositoriesGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero backup repositories files under %s", backupRepositoriesDir) + } + backupRepositories := []*velerov1.BackupRepository{} + for key, backupRepositoryYaml := range backupRepositoriesYaml { + backupRepository := &velerov1.BackupRepository{} + err := yaml.Unmarshal(backupRepositoryYaml, backupRepository) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal backup repository yaml from %s", key) + } + backupRepositories = append(backupRepositories, backupRepository) + } + // get backups.velero.io backupsDir := collect.GetVeleroBackupsDirectory(ns) backupsGlob := filepath.Join(backupsDir, "*") @@ -65,37 +82,234 @@ func (a *AnalyzeVelero) veleroStatus(analyzer *troubleshootv1beta2.VeleroAnalyze } // fmt.Printf("\n..found %d backups\n", len(backups)) - // get backuprepositories.velero.io - backupRpositoriesDir := collect.GetVeleroBackupRepositoriesDirectory(ns) - backupRepositoriesGlob := filepath.Join(backupRpositoriesDir, "*") - backupRepositoriesYaml, err := findFiles(backupRepositoriesGlob, excludeFiles) + // velerov1.BackupRepositoryTypeRestic + // // get resticrepositories.velero.io + // resticRepositoriesDir := collect.GetVeleroResticRepositoriesDirectory(ns) + // resticRepositoriesGlob := filepath.Join(resticRepositoriesDir, "*") + // resticRepositoriesYaml, err := findFiles(resticRepositoriesGlob, excludeFiles) + // if err != nil { + // return nil, errors.Wrapf(err, "failed to find velero restic repositories files under %s", resticRepositoriesDir) + // } + // resticRepositories := []*velerov1.ResticRepository{} + // for key, resticRepositoryYaml := range resticRepositoriesYaml { + // resticRepository := &velerov1.ResticRepository{} + // err := yaml.Unmarshal(resticRepositoryYaml, resticRepository) + // if err != nil { + // return nil, errors.Wrapf(err, "failed to unmarshal restic repository yaml from %s", key) + // } + // resticRepositories = append(resticRepositories, resticRepository) + // } + + // get backupstoragelocations.velero.io + backupStorageLocationsDir := collect.GetVeleroBackupStorageLocationsDirectory(ns) + backupStorageLocationsGlob := filepath.Join(backupStorageLocationsDir, "*") + backupStorageLocationsYaml, err := findFiles(backupStorageLocationsGlob, excludeFiles) if err != nil { - return nil, errors.Wrapf(err, "failed to find velero backup repositories files under %s", backupRpositoriesDir) + return nil, errors.Wrapf(err, "failed to find velero backup storage locations files under %s", backupStorageLocationsDir) } - backupRepositories := []*velerov1.BackupRepository{} - for key, backupRepositoryYaml := range backupRepositoriesYaml { - backupRepository := &velerov1.BackupRepository{} - err := yaml.Unmarshal(backupRepositoryYaml, backupRepository) + backupStorageLocations := []*velerov1.BackupStorageLocation{} + for key, backupStorageLocationYaml := range backupStorageLocationsYaml { + backupStorageLocation := &velerov1.BackupStorageLocation{} + err := yaml.Unmarshal(backupStorageLocationYaml, backupStorageLocation) if err != nil { - return nil, errors.Wrapf(err, "failed to unmarshal backup repository yaml from %s", key) + return nil, errors.Wrapf(err, "failed to unmarshal backup storage location yaml from %s", key) } - backupRepositories = append(backupRepositories, backupRepository) + backupStorageLocations = append(backupStorageLocations, backupStorageLocation) } - results := []*AnalyzeResult{} + // get deletebackuprequests.velero.io + deleteBackupRequestsDir := collect.GetVeleroDeleteBackupRequestsDirectory(ns) + deleteBackupRequestsGlob := filepath.Join(deleteBackupRequestsDir, "*") + deleteBackupRequestsYaml, err := findFiles(deleteBackupRequestsGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero delete backup requests files under %s", deleteBackupRequestsDir) + } + deleteBackupRequests := []*velerov1.DeleteBackupRequest{} + for key, deleteBackupRequestYaml := range deleteBackupRequestsYaml { + deleteBackupRequest := &velerov1.DeleteBackupRequest{} + err := yaml.Unmarshal(deleteBackupRequestYaml, deleteBackupRequest) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal delete backup request yaml from %s", key) + } + deleteBackupRequests = append(deleteBackupRequests, deleteBackupRequest) + } - results = append(results, analyzeBackups(backups)...) + // get downloadrequests.velero.io + // downloadRequestsDir := collect.GetVeleroDownloadRequestsDirectory(ns) + // downloadRequestsGlob := filepath.Join(downloadRequestsDir, "*") + // downloadRequestsYaml, err := findFiles(downloadRequestsGlob, excludeFiles) + // if err != nil { + // return nil, errors.Wrapf(err, "failed to find velero download requests files under %s", downloadRequestsDir) + // } + // downloadRequests := []*velerov1.DownloadRequest{} + // for key, downloadRequestYaml := range downloadRequestsYaml { + // downloadRequest := &velerov1.DownloadRequest{} + // err := yaml.Unmarshal(downloadRequestYaml, downloadRequest) + // if err != nil { + // return nil, errors.Wrapf(err, "failed to unmarshal download request yaml from %s", key) + // } + // downloadRequests = append(downloadRequests, downloadRequest) + // } + + // get podvolumebackups.velero.io + podVolumeBackupsDir := collect.GetVeleroPodVolumeBackupsDirectory(ns) + podVolumeBackupsGlob := filepath.Join(podVolumeBackupsDir, "*") + podVolumeBackupsYaml, err := findFiles(podVolumeBackupsGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero pod volume backups files under %s", podVolumeBackupsDir) + } + podVolumeBackups := []*velerov1.PodVolumeBackup{} + for key, podVolumeBackupYaml := range podVolumeBackupsYaml { + podVolumeBackup := &velerov1.PodVolumeBackup{} + err := yaml.Unmarshal(podVolumeBackupYaml, podVolumeBackup) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal pod volume backup yaml from %s", key) + } + podVolumeBackups = append(podVolumeBackups, podVolumeBackup) + } + + // get podvolumerestores.velero.io + podVolumeRestoresDir := collect.GetVeleroPodVolumeRestoresDirectory(ns) + podVolumeRestoresGlob := filepath.Join(podVolumeRestoresDir, "*") + podVolumeRestoresYaml, err := findFiles(podVolumeRestoresGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero pod volume restores files under %s", podVolumeRestoresDir) + } + podVolumeRestores := []*velerov1.PodVolumeRestore{} + for key, podVolumeRestoreYaml := range podVolumeRestoresYaml { + podVolumeRestore := &velerov1.PodVolumeRestore{} + err := yaml.Unmarshal(podVolumeRestoreYaml, podVolumeRestore) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal pod volume restore yaml from %s", key) + } + podVolumeRestores = append(podVolumeRestores, podVolumeRestore) + } // get restores.velero.io - // restoresDir := collect.GetVeleroRestoresDirectory(ns) + restoresDir := collect.GetVeleroRestoresDirectory(ns) + restoresGlob := filepath.Join(restoresDir, "*") + restoresYaml, err := findFiles(restoresGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero restores files under %s", restoresDir) + } + restores := []*velerov1.Restore{} + for key, restoreYaml := range restoresYaml { + restore := &velerov1.Restore{} + err := yaml.Unmarshal(restoreYaml, restore) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal restore yaml from %s", key) + } + restores = append(restores, restore) + } - // return print backup files found - // return nil, fmt.Errorf("found %d backups, %d backup repositories", len(backups), len(backupRepositories)) + // get schedules.velero.io + schedulesDir := collect.GetVeleroSchedulesDirectory(ns) + schedulesGlob := filepath.Join(schedulesDir, "*") + schedulesYaml, err := findFiles(schedulesGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero schedules files under %s", schedulesDir) + } + schedules := []*velerov1.Schedule{} + for key, scheduleYaml := range schedulesYaml { + schedule := &velerov1.Schedule{} + err := yaml.Unmarshal(scheduleYaml, schedule) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal schedule yaml from %s", key) + } + schedules = append(schedules, schedule) + } + + // get serverstatusrequests.velero.io + serverStatusRequestsDir := collect.GetVeleroServerStatusRequestsDirectory(ns) + serverStatusRequestsGlob := filepath.Join(serverStatusRequestsDir, "*") + serverStatusRequestsYaml, err := findFiles(serverStatusRequestsGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero server status requests files under %s", serverStatusRequestsDir) + } + serverStatusRequests := []*velerov1.ServerStatusRequest{} + for key, serverStatusRequestYaml := range serverStatusRequestsYaml { + serverStatusRequest := &velerov1.ServerStatusRequest{} + err := yaml.Unmarshal(serverStatusRequestYaml, serverStatusRequest) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal server status request yaml from %s", key) + } + serverStatusRequests = append(serverStatusRequests, serverStatusRequest) + } + + // get volumesnapshotlocations.velero.io + volumeSnapshotLocationsDir := collect.GetVeleroVolumeSnapshotLocationsDirectory(ns) + volumeSnapshotLocationsGlob := filepath.Join(volumeSnapshotLocationsDir, "*") + volumeSnapshotLocationsYaml, err := findFiles(volumeSnapshotLocationsGlob, excludeFiles) + if err != nil { + return nil, errors.Wrapf(err, "failed to find velero volume snapshot locations files under %s", volumeSnapshotLocationsDir) + } + volumeSnapshotLocations := []*velerov1.VolumeSnapshotLocation{} + for key, volumeSnapshotLocationYaml := range volumeSnapshotLocationsYaml { + volumeSnapshotLocation := &velerov1.VolumeSnapshotLocation{} + err := yaml.Unmarshal(volumeSnapshotLocationYaml, volumeSnapshotLocation) + if err != nil { + return nil, errors.Wrapf(err, "failed to unmarshal volume snapshot location yaml from %s", key) + } + volumeSnapshotLocations = append(volumeSnapshotLocations, volumeSnapshotLocation) + } + + results := []*AnalyzeResult{} results = append(results, analyzeBackupRepositories(backupRepositories)...) + results = append(results, analyzeBackups(backups)...) + // results = append(results, analyzeResticRepositories(resticRepositories)...) + results = append(results, analyzeBackupStorageLocations(backupStorageLocations)...) + results = append(results, analyzeDeleteBackupRequests(deleteBackupRequests)...) + // results = append(results, analyzeDownloadRequests(downloadRequests)...) + results = append(results, analyzePodVolumeBackups(podVolumeBackups)...) + results = append(results, analyzePodVolumeRestores(podVolumeRestores)...) + results = append(results, analyzeRestores(restores)...) + results = append(results, analyzeSchedules(schedules)...) + results = append(results, analyzeServerStatusRequests(serverStatusRequests)...) + results = append(results, analyzeVolumeSnapshotLocations(volumeSnapshotLocations)...) return aggregateResults(results), nil } +func analyzeBackupRepositories(backupRepositories []*velerov1.BackupRepository) []*AnalyzeResult { + results := []*AnalyzeResult{} + + // set flag to indicate if at least 1 backup repository is configured and ready + readyCount := 0 + + backupRepositoriesResult := &AnalyzeResult{ + Title: "At least 1 Velero Backup Repository configured", + } + if len(backupRepositories) == 0 { + backupRepositoriesResult.IsFail = true + backupRepositoriesResult.Message = "No backup repositories configured" + } else { + for _, backupRepository := range backupRepositories { + if backupRepository.Status.Phase != velerov1.BackupRepositoryPhaseReady { + result := &AnalyzeResult{ + Title: fmt.Sprintf("Backup Repository %s", backupRepository.Name), + } + result.Message = fmt.Sprintf("Backup Repository [%s] is in phase %s", backupRepository.Name, backupRepository.Status.Phase) + result.IsWarn = true + results = append(results, result) + // result.Strict = false + } else { + readyCount++ + } + } + if readyCount > 0 { + backupRepositoriesResult.IsPass = true + backupRepositoriesResult.Message = fmt.Sprintf("Found %d backup repositories configured and %d Ready", len(backupRepositories), readyCount) + } else { + backupRepositoriesResult.IsWarn = true + backupRepositoriesResult.Message = fmt.Sprintf("Found %d configured backup repositories, but none are ready", len(backupRepositories)) + } + } + results = append(results, backupRepositoriesResult) + + return results + +} + func analyzeBackups(backups []*velerov1.Backup) []*AnalyzeResult { results := []*AnalyzeResult{} @@ -140,50 +354,267 @@ func analyzeBackups(backups []*velerov1.Backup) []*AnalyzeResult { return results } -func analyzeBackupRepositories(backupRepositories []*velerov1.BackupRepository) []*AnalyzeResult { - +func analyzeBackupStorageLocations(backupStorageLocations []*velerov1.BackupStorageLocation) []*AnalyzeResult { results := []*AnalyzeResult{} - - backupRepositoriesResult := &AnalyzeResult{ - Title: "At least 1 Velero Backup Repository configured", + // atleast 1 backup storage location Phase Available + availableCount := 0 + bslResult := &AnalyzeResult{ + Title: "At least 1 Velero Backup Storage Location configured", } - if len(backupRepositories) == 0 { - backupRepositoriesResult.IsFail = true - backupRepositoriesResult.Message = "No backup repositories configured" - } else { - for _, backupRepository := range backupRepositories { - if backupRepository.Status.Phase == velerov1.BackupRepositoryPhaseNotReady { + if len(backupStorageLocations) == 0 { + bslResult.IsFail = true + bslResult.Message = "No backup storage locations configured" + } else { + for _, backupStorageLocation := range backupStorageLocations { + if backupStorageLocation.Status.Phase != velerov1.BackupStorageLocationPhaseAvailable { result := &AnalyzeResult{ - Title: fmt.Sprintf("Backup Repository %s", backupRepository.Name), + Title: fmt.Sprintf("Backup Storage Location %s", backupStorageLocation.Name), } - result.Message = fmt.Sprintf("Backup Repository [%s] is in phase NotReady", backupRepository.Name) + result.Message = fmt.Sprintf("Backup Storage Location [%s] is in phase %s", backupStorageLocation.Name, backupStorageLocation.Status.Phase) result.IsWarn = true results = append(results, result) // result.Strict = false + } else { + availableCount++ } } - backupRepositoriesResult.IsPass = true - backupRepositoriesResult.Message = fmt.Sprintf("Found %d configured backup repositories", len(backupRepositories)) + if availableCount > 0 { + bslResult.IsPass = true + bslResult.Message = fmt.Sprintf("Found %d backup storage locations configured and %d Available", len(backupStorageLocations), availableCount) + } else { + bslResult.IsWarn = true + bslResult.Message = fmt.Sprintf("Found %d configured backup storage locations, but none are available", len(backupStorageLocations)) + } } - results = append(results, backupRepositoriesResult) + results = append(results, bslResult) return results +} + +func analyzeDeleteBackupRequests(deleteBackupRequests []*velerov1.DeleteBackupRequest) []*AnalyzeResult { + results := []*AnalyzeResult{} + // count all in progress, new and processed + inProgressCount := 0 + // processedCount := 0 + // newCount := 0 + if len(deleteBackupRequests) > 0 { + for _, deleteBackupRequest := range deleteBackupRequests { + if deleteBackupRequest.Status.Phase == velerov1.DeleteBackupRequestPhaseInProgress { + inProgressCount++ + } + // else if deleteBackupRequest.Status.Phase == velerov1.DeleteBackupRequestPhaseProcessed { + // processedCount++ + // } else if deleteBackupRequest.Status.Phase == velerov1.DeleteBackupRequestPhaseNew { + // newCount++ + // } + } + if inProgressCount > 0 { + deleteBackupRequestsResult := &AnalyzeResult{ + Title: "Delete Backup Requests summary", + } + deleteBackupRequestsResult.IsWarn = true + deleteBackupRequestsResult.Message = fmt.Sprintf("Found %d delete backup requests in progress", inProgressCount) + results = append(results, deleteBackupRequestsResult) + } + // else if processedCount > 0 { + // deleteBackupRequestsResult.IsPass = true + // deleteBackupRequestsResult.Message = fmt.Sprintf("Found %d delete backup requests processed", processedCount) + // } else if newCount > 0 { + // deleteBackupRequestsResult.IsWarn = true + // deleteBackupRequestsResult.Message = fmt.Sprintf("Found %d delete backup requests new", newCount) + // } + + } + + return results +} + +// func analyzeDownloadRequests(downloadRequests []*velerov1.DownloadRequest) []*AnalyzeResult { +// results := []*AnalyzeResult{} +// // count all +// processedCount := 0 +// newCount := 0 +// if len(downloadRequests) > 0 { +// for _, downloadRequest := range downloadRequests { +// if downloadRequest.Status.Phase == velerov1.DownloadRequestPhaseProcessed { +// processedCount++ +// } else if downloadRequest.Status.Phase == velerov1.DownloadRequestPhaseNew { +// newCount++ +// } +// } +// if processedCount > 0 || newCount > 0 { +// downloadRequestsResult := &AnalyzeResult{ +// Title: "Download Requests summary", +// } +// downloadRequestsResult.IsPass = true +// downloadRequestsResult.Message = fmt.Sprintf("Found %d processed and %d new download requests", processedCount, newCount) +// results = append(results, downloadRequestsResult) +// } +// } + +// return results +// } + +func analyzePodVolumeBackups(podVolumeBackups []*velerov1.PodVolumeBackup) []*AnalyzeResult { + results := []*AnalyzeResult{} + failures := 0 + // isFail if any pod volume backup phase is Failed + if len(podVolumeBackups) > 0 { + // look for PodVolumeBackupPhaseFailed (only 1) + for _, podVolumeBackup := range podVolumeBackups { + if podVolumeBackup.Status.Phase == velerov1.PodVolumeBackupPhaseFailed { + result := &AnalyzeResult{ + Title: fmt.Sprintf("Pod Volume Backup %s", podVolumeBackup.Name), + } + result.IsFail = true + // result.Strict = true + result.Message = fmt.Sprintf("Pod Volume Backup %s phase is %s", podVolumeBackup.Name, podVolumeBackup.Status.Phase) + results = append(results, result) + failures++ + } + } + + if failures == 0 { + results = append(results, &AnalyzeResult{ + Title: "Pod Volume Backups count", + IsPass: true, + Message: fmt.Sprintf("Found %d pod volume backups", len(podVolumeBackups)), + }) + } + } + + return results +} + +func analyzePodVolumeRestores(podVolumeRestores []*velerov1.PodVolumeRestore) []*AnalyzeResult { + results := []*AnalyzeResult{} + // look for velerov1.PodVolumeRestorePhaseFailed + failures := 0 + if len(podVolumeRestores) > 0 { + for _, podVolumeRestore := range podVolumeRestores { + if podVolumeRestore.Status.Phase == velerov1.PodVolumeRestorePhaseFailed { + result := &AnalyzeResult{ + Title: fmt.Sprintf("Pod Volume Restore %s", podVolumeRestore.Name), + } + result.IsFail = true + // result.Strict = true + result.Message = fmt.Sprintf("Pod Volume Restore %s phase is %s", podVolumeRestore.Name, podVolumeRestore.Status.Phase) + results = append(results, result) + failures++ + } + } + if failures == 0 { + results = append(results, &AnalyzeResult{ + Title: "Pod Volume Restores count", + IsPass: true, + Message: fmt.Sprintf("Found %d pod volume restores", len(podVolumeRestores)), + }) + } + } + return results +} + +func analyzeRestores(restores []*velerov1.Restore) []*AnalyzeResult { + results := []*AnalyzeResult{} + failures := 0 + + if len(restores) > 0 { + + failedPhases := map[velerov1.RestorePhase]bool{ + velerov1.RestorePhaseFailed: true, + velerov1.RestorePhasePartiallyFailed: true, + velerov1.RestorePhaseFailedValidation: true, + velerov1.RestorePhaseWaitingForPluginOperationsPartiallyFailed: true, + } + + for _, restore := range restores { + if failedPhases[restore.Status.Phase] { + result := &AnalyzeResult{ + Title: fmt.Sprintf("Restore %s", restore.Name), + } + result.IsFail = true + // result.Strict = true + result.Message = fmt.Sprintf("Restore %s phase is %s", restore.Name, restore.Status.Phase) + results = append(results, result) + failures++ + } + // else if restore.Status.Phase == velerov1.RestorePhaseCompleted { + // result.IsPass = true + // // result.Strict = true + // } else { + // // may indicate phases like: + // // - velerov1.RestorePhaseWaitingForPluginOperations + // // - velerov1.RestorePhaseFinalizing + // result.IsWarn = true + // } + } + if failures == 0 { + results = append(results, &AnalyzeResult{ + Title: "Velero Restores count", + IsPass: true, + Message: fmt.Sprintf("Found %d restores", len(restores)), + }) + } + } + + return results +} + +func analyzeSchedules(schedules []*velerov1.Schedule) []*AnalyzeResult { + results := []*AnalyzeResult{} + // TODO + return results +} + +func analyzeServerStatusRequests(serverStatusRequests []*velerov1.ServerStatusRequest) []*AnalyzeResult { + results := []*AnalyzeResult{} + // TODO + return results +} + +func analyzeVolumeSnapshotLocations(volumeSnapshotLocations []*velerov1.VolumeSnapshotLocation) []*AnalyzeResult { + results := []*AnalyzeResult{} + // fail on velerov1.VolumeSnapshotLocationPhaseUnavailable + failures := 0 + if len(volumeSnapshotLocations) > 0 { + for _, volumeSnapshotLocation := range volumeSnapshotLocations { + if volumeSnapshotLocation.Status.Phase == velerov1.VolumeSnapshotLocationPhaseUnavailable { + result := &AnalyzeResult{ + Title: fmt.Sprintf("Volume Snapshot Location %s", volumeSnapshotLocation.Name), + } + result.IsFail = true + // result.Strict = true + result.Message = fmt.Sprintf("Volume Snapshot Location %s phase is %s", volumeSnapshotLocation.Name, volumeSnapshotLocation.Status.Phase) + results = append(results, result) + failures++ + } + } + if failures == 0 { + results = append(results, &AnalyzeResult{ + Title: "Velero Volume Snapshot Locations count", + IsPass: true, + Message: fmt.Sprintf("Found %d volume snapshot locations", len(volumeSnapshotLocations)), + }) + } + } + + return results } func aggregateResults(results []*AnalyzeResult) []*AnalyzeResult { out := []*AnalyzeResult{} - resultPass := false + resultFailed := false for _, result := range results { - if result.IsPass { - resultPass = true + if result.IsFail { + resultFailed = true // continue } out = append(out, result) } - if resultPass && len(out) == 0 { + if resultFailed == false { out = append(out, &AnalyzeResult{ Title: "Velero Status", IsPass: true,