diff --git a/CHANGELOG/CHANGELOG-1.9.md b/CHANGELOG/CHANGELOG-1.9.md index 0eb08b3bf..a2aa9bfe1 100644 --- a/CHANGELOG/CHANGELOG-1.9.md +++ b/CHANGELOG/CHANGELOG-1.9.md @@ -15,6 +15,7 @@ When cutting a new release, update the `unreleased` heading to the tag being gen ## unreleased +* [ENHANCEMENT] [#1046](https://github.com/k8ssandra/k8ssandra-operator/issues/1046) Add detailed backup information in the MedusaBackup CRD status * [BUGFIX] [#1027](https://github.com/k8ssandra/k8ssandra-operator/issues/1027) Point system-logger image to use the v1.16.0 tag instead of latest * [BUGFIX] [#1026](https://github.com/k8ssandra/k8ssandra-operator/issues/1026) Fix DC name overrides not being properly handled * [BUGFIX] [#981](https://github.com/k8ssandra/k8ssandra-operator/issues/981) Fix race condition in K8ssandraTask status update diff --git a/apis/medusa/v1alpha1/medusabackup_types.go b/apis/medusa/v1alpha1/medusabackup_types.go index adbd2d4a8..e45a62f10 100644 --- a/apis/medusa/v1alpha1/medusabackup_types.go +++ b/apis/medusa/v1alpha1/medusabackup_types.go @@ -37,12 +37,28 @@ type MedusaBackupSpec struct { // MedusaBackupStatus defines the observed state of MedusaBackup type MedusaBackupStatus struct { - StartTime metav1.Time `json:"startTime,omitempty"` - FinishTime metav1.Time `json:"finishTime,omitempty"` + StartTime metav1.Time `json:"startTime,omitempty"` + FinishTime metav1.Time `json:"finishTime,omitempty"` + TotalNodes int32 `json:"totalNodes,omitempty"` + FinishedNodes int32 `json:"finishedNodes,omitempty"` + Nodes []*MedusaBackupNode `json:"nodes,omitempty"` + Status string `json:"status,omitempty"` +} + +type MedusaBackupNode struct { + Host string `json:"host,omitempty"` + Tokens []int64 `json:"tokens,omitempty"` + Datacenter string `json:"datacenter,omitempty"` + Rack string `json:"rack,omitempty"` } //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Started",type=date,JSONPath=".status.startTime",description="Backup start time" +//+kubebuilder:printcolumn:name="Finished",type=date,JSONPath=".status.finishTime",description="Backup finish time" +//+kubebuilder:printcolumn:name="Nodes",type=string,JSONPath=".status.totalNodes",description="Total number of nodes at the time of the backup" +//+kubebuilder:printcolumn:name="Completed",type=string,JSONPath=".status.finishedNodes",description="Number of nodes that completed this backup" +//+kubebuilder:printcolumn:name="Status",type=string,JSONPath=".status.status",description="Backup completion status" // MedusaBackup is the Schema for the medusabackups API type MedusaBackup struct { diff --git a/apis/medusa/v1alpha1/medusabackupjob_types.go b/apis/medusa/v1alpha1/medusabackupjob_types.go index 6c3238080..b74621e46 100644 --- a/apis/medusa/v1alpha1/medusabackupjob_types.go +++ b/apis/medusa/v1alpha1/medusabackupjob_types.go @@ -49,8 +49,10 @@ type MedusaBackupJobStatus struct { Failed []string `json:"failed,omitempty"` } -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Started",type=date,JSONPath=".status.startTime",description="Backup start time" +// +kubebuilder:printcolumn:name="Finished",type=date,JSONPath=".status.finishTime",description="Backup finish time" // MedusaBackupJob is the Schema for the medusabackupjobs API type MedusaBackupJob struct { diff --git a/apis/medusa/v1alpha1/zz_generated.deepcopy.go b/apis/medusa/v1alpha1/zz_generated.deepcopy.go index 14836fee3..20d88551c 100644 --- a/apis/medusa/v1alpha1/zz_generated.deepcopy.go +++ b/apis/medusa/v1alpha1/zz_generated.deepcopy.go @@ -212,6 +212,26 @@ func (in *MedusaBackupList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MedusaBackupNode) DeepCopyInto(out *MedusaBackupNode) { + *out = *in + if in.Tokens != nil { + in, out := &in.Tokens, &out.Tokens + *out = make([]int64, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MedusaBackupNode. +func (in *MedusaBackupNode) DeepCopy() *MedusaBackupNode { + if in == nil { + return nil + } + out := new(MedusaBackupNode) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MedusaBackupSchedule) DeepCopyInto(out *MedusaBackupSchedule) { *out = *in @@ -324,6 +344,17 @@ func (in *MedusaBackupStatus) DeepCopyInto(out *MedusaBackupStatus) { *out = *in in.StartTime.DeepCopyInto(&out.StartTime) in.FinishTime.DeepCopyInto(&out.FinishTime) + if in.Nodes != nil { + in, out := &in.Nodes, &out.Nodes + *out = make([]*MedusaBackupNode, len(*in)) + for i := range *in { + if (*in)[i] != nil { + in, out := &(*in)[i], &(*out)[i] + *out = new(MedusaBackupNode) + (*in).DeepCopyInto(*out) + } + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MedusaBackupStatus. diff --git a/config/crd/bases/medusa.k8ssandra.io_medusabackupjobs.yaml b/config/crd/bases/medusa.k8ssandra.io_medusabackupjobs.yaml index efd0afa6b..ae1ed8b4b 100644 --- a/config/crd/bases/medusa.k8ssandra.io_medusabackupjobs.yaml +++ b/config/crd/bases/medusa.k8ssandra.io_medusabackupjobs.yaml @@ -15,7 +15,16 @@ spec: singular: medusabackupjob scope: Namespaced versions: - - name: v1alpha1 + - additionalPrinterColumns: + - description: Backup start time + jsonPath: .status.startTime + name: Started + type: date + - description: Backup finish time + jsonPath: .status.finishTime + name: Finished + type: date + name: v1alpha1 schema: openAPIV3Schema: description: MedusaBackupJob is the Schema for the medusabackupjobs API diff --git a/config/crd/bases/medusa.k8ssandra.io_medusabackups.yaml b/config/crd/bases/medusa.k8ssandra.io_medusabackups.yaml index 446d70241..a387fbf62 100644 --- a/config/crd/bases/medusa.k8ssandra.io_medusabackups.yaml +++ b/config/crd/bases/medusa.k8ssandra.io_medusabackups.yaml @@ -15,7 +15,28 @@ spec: singular: medusabackup scope: Namespaced versions: - - name: v1alpha1 + - additionalPrinterColumns: + - description: Backup start time + jsonPath: .status.startTime + name: Started + type: date + - description: Backup finish time + jsonPath: .status.finishTime + name: Finished + type: date + - description: Total number of nodes at the time of the backup + jsonPath: .status.totalNodes + name: Nodes + type: string + - description: Number of nodes that completed this backup + jsonPath: .status.finishedNodes + name: Completed + type: string + - description: Backup completion status + jsonPath: .status.status + name: Status + type: string + name: v1alpha1 schema: openAPIV3Schema: description: MedusaBackup is the Schema for the medusabackups API @@ -54,9 +75,33 @@ spec: finishTime: format: date-time type: string + finishedNodes: + format: int32 + type: integer + nodes: + items: + properties: + datacenter: + type: string + host: + type: string + rack: + type: string + tokens: + items: + format: int64 + type: integer + type: array + type: object + type: array startTime: format: date-time type: string + status: + type: string + totalNodes: + format: int32 + type: integer type: object type: object served: true diff --git a/controllers/medusa/medusabackupjob_controller.go b/controllers/medusa/medusabackupjob_controller.go index 15c320709..8310f242b 100644 --- a/controllers/medusa/medusabackupjob_controller.go +++ b/controllers/medusa/medusabackupjob_controller.go @@ -128,7 +128,12 @@ func (r *MedusaBackupJobReconciler) Reconcile(ctx context.Context, req ctrl.Requ logger.Info("backup complete") // The MedusaBackupJob is finished and we now need to create the MedusaBackup object. - if err := r.createMedusaBackup(ctx, backup, logger); err != nil { + backupSummary, err := r.getBackupSummary(ctx, backup, pods, logger) + if err != nil { + logger.Error(err, "Failed to get backup summary") + return ctrl.Result{RequeueAfter: r.DefaultDelay}, err + } + if err := r.createMedusaBackup(ctx, backup, backupSummary, logger); err != nil { logger.Error(err, "Failed to create MedusaBackup") return ctrl.Result{RequeueAfter: r.DefaultDelay}, err } @@ -210,7 +215,25 @@ func (r *MedusaBackupJobReconciler) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{RequeueAfter: r.DefaultDelay}, nil } -func (r *MedusaBackupJobReconciler) createMedusaBackup(ctx context.Context, backup *medusav1alpha1.MedusaBackupJob, logger logr.Logger) error { +func (r *MedusaBackupJobReconciler) getBackupSummary(ctx context.Context, backup *medusav1alpha1.MedusaBackupJob, pods []corev1.Pod, logger logr.Logger) (*medusa.BackupSummary, error) { + for _, pod := range pods { + if remoteBackups, err := GetBackups(ctx, &pod, r.ClientFactory); err != nil { + logger.Error(err, "failed to list backups", "CassandraPod", pod.Name) + return nil, err + } else { + for _, remoteBackup := range remoteBackups { + logger.Info("found backup", "CassandraPod", pod.Name, "Backup", remoteBackup.BackupName) + if backup.ObjectMeta.Name == remoteBackup.BackupName { + return remoteBackup, nil + } + logger.Info("backup name does not match", "CassandraPod", pod.Name, "Backup", remoteBackup.BackupName) + } + } + } + return nil, nil +} + +func (r *MedusaBackupJobReconciler) createMedusaBackup(ctx context.Context, backup *medusav1alpha1.MedusaBackupJob, backupSummary *medusa.BackupSummary, logger logr.Logger) error { // Create a MedusaBackup object after a successful MedusaBackupJob execution. logger.Info("Creating MedusaBackup object", "MedusaBackup", backup.Name) backupKey := types.NamespacedName{Namespace: backup.ObjectMeta.Namespace, Name: backup.Name} @@ -239,6 +262,18 @@ func (r *MedusaBackupJobReconciler) createMedusaBackup(ctx context.Context, back backupPatch := client.MergeFrom(backupResource.DeepCopy()) backupResource.Status.StartTime = startTime backupResource.Status.FinishTime = finishTime + backupResource.Status.TotalNodes = backupSummary.TotalNodes + backupResource.Status.FinishedNodes = backupSummary.FinishedNodes + backupResource.Status.Nodes = make([]*medusav1alpha1.MedusaBackupNode, len(backupSummary.Nodes)) + for i, node := range backupSummary.Nodes { + backupResource.Status.Nodes[i] = &medusav1alpha1.MedusaBackupNode{ + Host: node.Host, + Tokens: node.Tokens, + Datacenter: node.Datacenter, + Rack: node.Rack, + } + } + backupResource.Status.Status = backupSummary.Status.String() if err := r.Status().Patch(ctx, backupResource, backupPatch); err != nil { logger.Error(err, "failed to patch status with finish time") return err diff --git a/controllers/medusa/medusabackupjob_controller_test.go b/controllers/medusa/medusabackupjob_controller_test.go index 685819544..8407af46e 100644 --- a/controllers/medusa/medusabackupjob_controller_test.go +++ b/controllers/medusa/medusabackupjob_controller_test.go @@ -235,6 +235,16 @@ func createAndVerifyMedusaBackup(dcKey framework.ClusterKey, dc *cassdcapi.Cassa return !updated.Status.FinishTime.IsZero() && len(updated.Status.Finished) == 3 && len(updated.Status.InProgress) == 0 }, timeout, interval) + t.Log("verify that the MedusaBackup is created") + medusaBackupKey := framework.NewClusterKey(dcKey.K8sContext, dcKey.Namespace, backupName) + medusaBackup := &api.MedusaBackup{} + err = f.Get(ctx, medusaBackupKey, medusaBackup) + require.NoError(err, "failed to get MedusaBackup") + require.Equal(medusaBackup.Status.TotalNodes, dc.Spec.Size, "backup total nodes doesn't match dc nodes") + require.Equal(medusaBackup.Status.FinishedNodes, dc.Spec.Size, "backup finished nodes doesn't match dc nodes") + require.Equal(len(medusaBackup.Status.Nodes), int(dc.Spec.Size), "backup topology doesn't match dc topology") + require.Equal(medusa.StatusType_SUCCESS.String(), medusaBackup.Status.Status, "backup status is not success") + require.Equal(int(dc.Spec.Size), len(medusaClientFactory.GetRequestedBackups(dc.DatacenterName()))) return true @@ -339,10 +349,32 @@ func (c *fakeMedusaClient) GetBackups(ctx context.Context) ([]*medusa.BackupSumm backups := make([]*medusa.BackupSummary, 0) for _, name := range c.RequestedBackups { backup := &medusa.BackupSummary{ - BackupName: name, - StartTime: 0, - FinishTime: 10, - Status: *medusa.StatusType_SUCCESS.Enum(), + BackupName: name, + StartTime: 0, + FinishTime: 10, + TotalNodes: 3, + FinishedNodes: 3, + Status: *medusa.StatusType_SUCCESS.Enum(), + Nodes: []*medusa.BackupNode{ + { + Host: "host1", + Tokens: []int64{1, 2, 3}, + Datacenter: "dc1", + Rack: "rack1", + }, + { + Host: "host2", + Tokens: []int64{1, 2, 3}, + Datacenter: "dc1", + Rack: "rack1", + }, + { + Host: "host3", + Tokens: []int64{1, 2, 3}, + Datacenter: "dc1", + Rack: "rack1", + }, + }, } backups = append(backups, backup) } diff --git a/controllers/medusa/medusatask_controller.go b/controllers/medusa/medusatask_controller.go index 8b3590c51..24279da41 100644 --- a/controllers/medusa/medusatask_controller.go +++ b/controllers/medusa/medusatask_controller.go @@ -265,7 +265,7 @@ func (r *MedusaTaskReconciler) syncOperation(ctx context.Context, task *medusav1 } for _, pod := range pods { logger.Info("Listing Backups...", "CassandraPod", pod.Name) - if remoteBackups, err := getBackups(ctx, &pod, r.ClientFactory); err != nil { + if remoteBackups, err := GetBackups(ctx, &pod, r.ClientFactory); err != nil { logger.Error(err, "failed to list backups", "CassandraPod", pod.Name) } else { for _, backup := range remoteBackups { @@ -344,6 +344,19 @@ func createMedusaBackup(logger logr.Logger, backup *medusa.BackupSummary, datace backupPatch := client.MergeFrom(backupResource.DeepCopy()) backupResource.Status.StartTime = startTime backupResource.Status.FinishTime = finishTime + backupResource.Status.TotalNodes = backup.TotalNodes + backupResource.Status.FinishedNodes = backup.FinishedNodes + backupResource.Status.Nodes = make([]*medusav1alpha1.MedusaBackupNode, len(backup.Nodes)) + for i, node := range backup.Nodes { + backupResource.Status.Nodes[i] = &medusav1alpha1.MedusaBackupNode{ + Host: node.Host, + Tokens: node.Tokens, + Datacenter: node.Datacenter, + Rack: node.Rack, + } + } + backupResource.Status.Status = backup.Status.String() + if err := r.Status().Patch(ctx, backupResource, backupPatch); err != nil { logger.Error(err, "failed to patch status with finish time") return true, ctrl.Result{}, err @@ -401,7 +414,7 @@ func prepareRestore(ctx context.Context, task *medusav1alpha1.MedusaTask, pod *c } } -func getBackups(ctx context.Context, pod *corev1.Pod, clientFactory medusa.ClientFactory) ([]*medusa.BackupSummary, error) { +func GetBackups(ctx context.Context, pod *corev1.Pod, clientFactory medusa.ClientFactory) ([]*medusa.BackupSummary, error) { addr := net.JoinHostPort(pod.Status.PodIP, fmt.Sprint(shared.BackupSidecarPort)) if medusaClient, err := clientFactory.NewClient(addr); err != nil { return nil, err diff --git a/docs/content/en/tasks/backup-restore/_index.md b/docs/content/en/tasks/backup-restore/_index.md index 859296d95..940a804b3 100644 --- a/docs/content/en/tasks/backup-restore/_index.md +++ b/docs/content/en/tasks/backup-restore/_index.md @@ -147,8 +147,63 @@ status: ``` +The start and finish times are also displayed in the output of the kubectl get command: + +```sh +% kubectl get MedusaBackupJob -A +NAME STARTED FINISHED +backup1 25m 24m +medusa-backup1 19m 19m +``` + + All pods having completed the backup will be in the `finished` list. At the end of the backup operation, a `MedusaBackup` custom resource will be created with the same name as the `MedusaBackupJob` object. It materializes the backup locally on the Kubernetes cluster. +The MedusaBackup object status contains the total number of node in the cluster at the time of the backup, the number of nodes that successfully achieved the backup, and the topology of the DC at the time of the backup: + +```yaml +apiVersion: medusa.k8ssandra.io/v1alpha1 +kind: MedusaBackup +metadata: + name: backup1 +status: + startTime: '2023-09-13T12:15:57Z' + finishTime: '2023-09-13T12:16:12Z' + totalNodes: 2 + finishedNodes: 2 + nodes: + - datacenter: dc1 + host: firstcluster-dc1-default-sts-0 + rack: default + tokens: + - -110555885826893 + - -1149279817337332700 + - -1222258121654772000 + - -127355705089199870 + - datacenter: dc1 + host: firstcluster-dc1-default-sts-1 + rack: default + tokens: + - -1032268962284829800 + - -1054373523049285200 + - -1058110708807841300 + - -107256661843445790 + status: SUCCESS +spec: + backupType: differential + cassandraDatacenter: dc1 + +``` + +The `kubectl get`` output for MedusaBackup objects will show a subset of this information : + +```sh +kubectl get MedusaBackup -A +NAME STARTED FINISHED NODES COMPLETED STATUS +backup1 29m 28m 2 2 SUCCESS +medusa-backup1 23m 23m 2 2 SUCCESS +``` + For a restore to be possible, a `MedusaBackup` object must exist. diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 869a0f946..146f64def 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -127,6 +127,19 @@ func verifyBackupJobFinished(t *testing.T, ctx context.Context, f *framework.E2e t.Logf("backup in progress: %v", updated.Status.InProgress) return !updated.Status.FinishTime.IsZero() && len(updated.Status.InProgress) == 0 }, polling.medusaBackupDone.timeout, polling.medusaBackupDone.interval, "backup didn't finish within timeout") + + dc := &cassdcapi.CassandraDatacenter{} + err := f.Get(ctx, dcKey, dc) + require.NoError(err, "failed to get CassandraDatacenter") + + medusaBackupKey := framework.ClusterKey{K8sContext: dcKey.K8sContext, NamespacedName: types.NamespacedName{Namespace: backupKey.Namespace, Name: backupName}} + medusaBackup := &medusa.MedusaBackup{} + err = f.Get(ctx, medusaBackupKey, medusaBackup) + require.NoError(err, "failed to get MedusaBackup") + require.Equal(dc.Spec.Size, medusaBackup.Status.TotalNodes, "backup total nodes doesn't match dc nodes") + require.Equal(dc.Spec.Size, medusaBackup.Status.FinishedNodes, "backup finished nodes doesn't match dc nodes") + require.Equal(int(dc.Spec.Size), len(medusaBackup.Status.Nodes), "backup topology doesn't match dc topology") + require.Equal(medusapkg.StatusType_SUCCESS.String(), medusaBackup.Status.Status, "backup topology doesn't match dc topology") } func restoreBackupJob(t *testing.T, ctx context.Context, namespace string, f *framework.E2eFramework, dcKey framework.ClusterKey) {