From 770e8628ea452da498c1b6289d8853f4e5502785 Mon Sep 17 00:00:00 2001 From: Francesco Pantano Date: Fri, 20 Sep 2024 09:37:28 +0200 Subject: [PATCH 1/3] Run manila service cleanup when a share is deleted When a share is deleted from the top-level CR, its reference is not deleted, and manila still sees it as Down/not available. For this reason, every time a scale down operation is executed, we need to run a job that is supposed to run a service cleanup, removing everything set as down. Jira: https://issues.redhat.com/browse/OSPRH-6526 Signed-off-by: Francesco Pantano --- api/v1beta1/manila_types.go | 3 +- controllers/manila_controller.go | 47 ++++++++++++++++++++++++++++---- pkg/manila/const.go | 8 +++++- pkg/manila/{dbsync.go => job.go} | 45 +++++++++++++++++------------- 4 files changed, 76 insertions(+), 27 deletions(-) rename pkg/manila/{dbsync.go => job.go} (65%) diff --git a/api/v1beta1/manila_types.go b/api/v1beta1/manila_types.go index ed924f9f..6fb06e8e 100644 --- a/api/v1beta1/manila_types.go +++ b/api/v1beta1/manila_types.go @@ -25,7 +25,8 @@ import ( const ( // DbSyncHash hash DbSyncHash = "dbsync" - + // SvcCleanupHash hash + SvcCleanupHash = "servicecleanup" // DeploymentHash hash used to detect changes DeploymentHash = "deployment" ) diff --git a/controllers/manila_controller.go b/controllers/manila_controller.go index d5205eca..374452d7 100644 --- a/controllers/manila_controller.go +++ b/controllers/manila_controller.go @@ -366,7 +366,13 @@ func (r *ManilaReconciler) reconcileInit( // run manila db sync // dbSyncHash := instance.Status.Hash[manilav1beta1.DbSyncHash] - jobDef := manila.DbSyncJob(instance, serviceLabels, serviceAnnotations) + jobDef := manila.Job( + instance, + serviceLabels, + serviceAnnotations, + manila.DBSyncJobName, + manila.DBSyncCommand, + ) dbSyncjob := job.NewJob( jobDef, manilav1beta1.DbSyncHash, @@ -778,11 +784,38 @@ func (r *ManilaReconciler) reconcileNormal(ctx context.Context, instance *manila instance.Status.Conditions.MarkTrue(condition.CronJobReadyCondition, condition.CronJobReadyMessage) // create CronJob - end - err = r.shareCleanup(ctx, instance) + cleanJob, err := r.shareCleanup(ctx, instance) if err != nil { return ctrl.Result{}, err } + if cleanJob { + jobDef := manila.Job( + instance, + serviceLabels, + serviceAnnotations, + manila.SvcCleanupJobName, + manila.SvcCleanupCommand, + ) + svcCleanupHash := instance.Status.Hash[manilav1beta1.SvcCleanupHash] + shareCleanupJob := job.NewJob( + jobDef, + manilav1beta1.SvcCleanupHash, + instance.Spec.PreserveJobs, + manila.ShortDuration, + svcCleanupHash, + ) + ctrlResult, err := shareCleanupJob.DoJob( + ctx, + helper, + ) + if err != nil { + return ctrl.Result{}, err + } + if (ctrlResult != ctrl.Result{}) { + return ctrlResult, nil + } + } r.Log.Info(fmt.Sprintf("Reconciled Service '%s' successfully", instance.Name)) // update the overall status condition if service is ready if instance.IsReady() { @@ -1216,15 +1249,16 @@ func (r *ManilaReconciler) checkManilaShareGeneration( func (r *ManilaReconciler) shareCleanup( ctx context.Context, instance *manilav1beta1.Manila, -) error { +) (bool, error) { // Generate a list of share CRs shares := &manilav1beta1.ManilaShareList{} + cleanJob := false listOpts := []client.ListOption{ client.InNamespace(instance.Namespace), } if err := r.Client.List(ctx, shares, listOpts...); err != nil { r.Log.Error(err, "Unable to retrieve Manila Share CRs %v") - return nil + return cleanJob, nil } for _, share := range shares.Items { // Skip shares CRs that we don't own @@ -1238,10 +1272,11 @@ func (r *ManilaReconciler) shareCleanup( err := r.Client.Delete(ctx, &share) if err != nil && !k8s_errors.IsNotFound(err) { err = fmt.Errorf("Error cleaning up %s: %w", share.Name, err) - return err + return cleanJob, err } delete(instance.Status.ManilaSharesReadyCounts, share.ShareName()) + cleanJob = true } } - return nil + return cleanJob, nil } diff --git a/pkg/manila/const.go b/pkg/manila/const.go index e8f06f86..ce8b7e62 100644 --- a/pkg/manila/const.go +++ b/pkg/manila/const.go @@ -79,8 +79,14 @@ const ( ShortDuration = time.Duration(5) * time.Second // NormalDuration - NormalDuration = time.Duration(10) * time.Second - //DBSyncCommand - + // DBSyncJobName - + DBSyncJobName = "db-sync" + // DBSyncCommand - DBSyncCommand = "/usr/bin/manila-manage --config-dir /etc/manila/manila.conf.d db sync" + // SvcCleanupJobName - + SvcCleanupJobName = "service-cleanup" + // SvcCleanupCommand - + SvcCleanupCommand = "/usr/bin/manila-manage --config-dir /etc/manila/manila.conf.d service cleanup" ) // DbsyncPropagation keeps track of the DBSync Service Propagation Type diff --git a/pkg/manila/dbsync.go b/pkg/manila/job.go similarity index 65% rename from pkg/manila/dbsync.go rename to pkg/manila/job.go index 6cf2565b..1e8ebd2b 100644 --- a/pkg/manila/dbsync.go +++ b/pkg/manila/job.go @@ -1,6 +1,7 @@ package manila import ( + "fmt" "github.com/openstack-k8s-operators/lib-common/modules/common/env" manilav1 "github.com/openstack-k8s-operators/manila-operator/api/v1beta1" batchv1 "k8s.io/api/batch/v1" @@ -8,18 +9,23 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// DbSyncJob func -func DbSyncJob(instance *manilav1.Manila, labels map[string]string, annotations map[string]string) *batchv1.Job { +// ManilaJob func +func Job( + instance *manilav1.Manila, + labels map[string]string, + annotations map[string]string, + jobName string, + jobCommand string, +) *batchv1.Job { var config0644AccessMode int32 = 0644 - - // Unlike the individual manila services, the DbSyncJob doesn't need a - // secret that contains all of the config snippets required by every - // service, The two snippet files that it does need (DefaultsConfigFileName - // and CustomConfigFileName) can be extracted from the top-level manila - // config-data secret. - dbSyncVolume := []corev1.Volume{ + // Unlike the individual manila services, DbSyncJob or a Job executing a + // manila-manage command doesn't need a secret that contains all of the + // config snippets required by every service, The two snippet files that it + // does need (DefaultsConfigFileName and CustomConfigFileName) can be + // extracted from the top-level manila config-data secret. + manilaJobVolume := []corev1.Volume{ { - Name: "db-sync-config-data", + Name: "job-config-data", VolumeSource: corev1.VolumeSource{ Secret: &corev1.SecretVolumeSource{ DefaultMode: &config0644AccessMode, @@ -48,9 +54,9 @@ func DbSyncJob(instance *manilav1.Manila, labels map[string]string, annotations }, } - dbSyncMounts := []corev1.VolumeMount{ + manilaJobMounts := []corev1.VolumeMount{ { - Name: "db-sync-config-data", + Name: "job-config-data", MountPath: "/etc/manila/manila.conf.d", ReadOnly: true, }, @@ -62,12 +68,12 @@ func DbSyncJob(instance *manilav1.Manila, labels map[string]string, annotations }, } - args := []string{"-c", DBSyncCommand} + args := []string{"-c", jobCommand} // add CA cert if defined if instance.Spec.ManilaAPI.TLS.CaBundleSecretName != "" { - dbSyncVolume = append(dbSyncVolume, instance.Spec.ManilaAPI.TLS.CreateVolume()) - dbSyncMounts = append(dbSyncMounts, instance.Spec.ManilaAPI.TLS.CreateVolumeMounts(nil)...) + manilaJobVolume = append(manilaJobVolume, instance.Spec.ManilaAPI.TLS.CreateVolume()) + manilaJobMounts = append(manilaJobMounts, instance.Spec.ManilaAPI.TLS.CreateVolumeMounts(nil)...) } envVars := map[string]env.Setter{} @@ -76,7 +82,8 @@ func DbSyncJob(instance *manilav1.Manila, labels map[string]string, annotations job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - Name: instance.Name + "-db-sync", + //Name: instance.Name + "-db-sync", + Name: fmt.Sprintf("%s-%s", instance.Name, jobName), Namespace: instance.Namespace, Labels: labels, }, @@ -90,7 +97,7 @@ func DbSyncJob(instance *manilav1.Manila, labels map[string]string, annotations ServiceAccountName: instance.RbacResourceName(), Containers: []corev1.Container{ { - Name: instance.Name + "-db-sync", + Name: fmt.Sprintf("%s-%s", instance.Name, jobName), Command: []string{ "/bin/bash", }, @@ -98,10 +105,10 @@ func DbSyncJob(instance *manilav1.Manila, labels map[string]string, annotations Image: instance.Spec.ManilaAPI.ContainerImage, SecurityContext: manilaDefaultSecurityContext(), Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: dbSyncMounts, + VolumeMounts: manilaJobMounts, }, }, - Volumes: dbSyncVolume, + Volumes: manilaJobVolume, }, }, }, From 23b53012bf68715c1cc456062bec0d8787551c7a Mon Sep 17 00:00:00 2001 From: Francesco Pantano Date: Fri, 20 Sep 2024 21:08:36 +0200 Subject: [PATCH 2/3] Run a cleanup Job when ManilaShare is scaled down When a backend config is removed, the "service" database entry in manila needs to be adjusted to account for the removal. This patch introduces a Job that runs a manila-manage command every time a share is scaled down. The main job definition is now more flexible and we can pass a TTL in case we want to customize the time the job is kept. Signed-off-by: Francesco Pantano --- controllers/manila_controller.go | 35 ++++++++++++++++++-------------- pkg/manila/const.go | 4 ++++ pkg/manila/funcs.go | 11 ++++++++++ pkg/manila/job.go | 9 +++++--- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/controllers/manila_controller.go b/controllers/manila_controller.go index 374452d7..6cd3fca3 100644 --- a/controllers/manila_controller.go +++ b/controllers/manila_controller.go @@ -21,6 +21,7 @@ import ( "fmt" "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" "github.com/go-logr/logr" memcachedv1 "github.com/openstack-k8s-operators/infra-operator/apis/memcached/v1beta1" @@ -370,6 +371,7 @@ func (r *ManilaReconciler) reconcileInit( instance, serviceLabels, serviceAnnotations, + nil, manila.DBSyncJobName, manila.DBSyncCommand, ) @@ -784,7 +786,7 @@ func (r *ManilaReconciler) reconcileNormal(ctx context.Context, instance *manila instance.Status.Conditions.MarkTrue(condition.CronJobReadyCondition, condition.CronJobReadyMessage) // create CronJob - end - cleanJob, err := r.shareCleanup(ctx, instance) + cleanJob, hash, err := r.shareCleanup(ctx, instance) if err != nil { return ctrl.Result{}, err } @@ -794,26 +796,23 @@ func (r *ManilaReconciler) reconcileNormal(ctx context.Context, instance *manila instance, serviceLabels, serviceAnnotations, - manila.SvcCleanupJobName, + ptr.To(manila.TTL), + fmt.Sprintf("%s-%s", manila.SvcCleanupJobName, hash[:manila.TruncateHash]), manila.SvcCleanupCommand, ) - svcCleanupHash := instance.Status.Hash[manilav1beta1.SvcCleanupHash] shareCleanupJob := job.NewJob( jobDef, manilav1beta1.SvcCleanupHash, - instance.Spec.PreserveJobs, + false, manila.ShortDuration, - svcCleanupHash, + "", ) ctrlResult, err := shareCleanupJob.DoJob( ctx, helper, ) if err != nil { - return ctrl.Result{}, err - } - if (ctrlResult != ctrl.Result{}) { - return ctrlResult, nil + return ctrlResult, err } } r.Log.Info(fmt.Sprintf("Reconciled Service '%s' successfully", instance.Name)) @@ -1249,34 +1248,40 @@ func (r *ManilaReconciler) checkManilaShareGeneration( func (r *ManilaReconciler) shareCleanup( ctx context.Context, instance *manilav1beta1.Manila, -) (bool, error) { +) (bool, string, error) { + cleanJob := false + var deletedShares = []string{} // Generate a list of share CRs shares := &manilav1beta1.ManilaShareList{} - cleanJob := false + listOpts := []client.ListOption{ client.InNamespace(instance.Namespace), } if err := r.Client.List(ctx, shares, listOpts...); err != nil { r.Log.Error(err, "Unable to retrieve Manila Share CRs %v") - return cleanJob, nil + return cleanJob, "", nil } for _, share := range shares.Items { // Skip shares CRs that we don't own if manila.GetOwningManilaName(&share) != instance.Name { continue } - // Delete the manilaShare if it's no longer in the spec _, exists := instance.Spec.ManilaShares[share.ShareName()] if !exists && share.DeletionTimestamp.IsZero() { err := r.Client.Delete(ctx, &share) if err != nil && !k8s_errors.IsNotFound(err) { err = fmt.Errorf("Error cleaning up %s: %w", share.Name, err) - return cleanJob, err + return cleanJob, "", err } delete(instance.Status.ManilaSharesReadyCounts, share.ShareName()) + deletedShares = append(deletedShares, share.ShareName()) cleanJob = true } } - return cleanJob, nil + hash, err := manila.SharesListHash(deletedShares) + if err != nil { + return false, hash, err + } + return cleanJob, hash, nil } diff --git a/pkg/manila/const.go b/pkg/manila/const.go index ce8b7e62..92cdd05b 100644 --- a/pkg/manila/const.go +++ b/pkg/manila/const.go @@ -87,6 +87,10 @@ const ( SvcCleanupJobName = "service-cleanup" // SvcCleanupCommand - SvcCleanupCommand = "/usr/bin/manila-manage --config-dir /etc/manila/manila.conf.d service cleanup" + // TruncateHash - + TruncateHash int = 8 + // TTL - + TTL int32 = 5 * 60 // 5 minutes ) // DbsyncPropagation keeps track of the DBSync Service Propagation Type diff --git a/pkg/manila/funcs.go b/pkg/manila/funcs.go index eb4b1881..735e9c2b 100644 --- a/pkg/manila/funcs.go +++ b/pkg/manila/funcs.go @@ -3,6 +3,7 @@ package manila import ( common "github.com/openstack-k8s-operators/lib-common/modules/common" "github.com/openstack-k8s-operators/lib-common/modules/common/affinity" + "github.com/openstack-k8s-operators/lib-common/modules/common/util" corev1 "k8s.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -55,3 +56,13 @@ func GetPodAffinity(componentName string) *corev1.Affinity { corev1.LabelHostname, ) } + +// SharesListHash - Given a list of share names passed as an array of strings, +// it returns an hash that is currently used to build the job name +func SharesListHash(shareNames []string) (string, error) { + hash, err := util.ObjectHash(shareNames) + if err != nil { + return hash, err + } + return hash, err +} diff --git a/pkg/manila/job.go b/pkg/manila/job.go index 1e8ebd2b..aca2273b 100644 --- a/pkg/manila/job.go +++ b/pkg/manila/job.go @@ -9,11 +9,12 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// ManilaJob func +// Job func func Job( instance *manilav1.Manila, labels map[string]string, annotations map[string]string, + ttl *int32, jobName string, jobCommand string, ) *batchv1.Job { @@ -82,7 +83,6 @@ func Job( job := &batchv1.Job{ ObjectMeta: metav1.ObjectMeta{ - //Name: instance.Name + "-db-sync", Name: fmt.Sprintf("%s-%s", instance.Name, jobName), Namespace: instance.Namespace, Labels: labels, @@ -113,6 +113,9 @@ func Job( }, }, } - + if ttl != nil { + // Setting TTL to delete the job after it has completed + job.Spec.TTLSecondsAfterFinished = ttl + } return job } From 7375047ed4947cd34e7314186de743d3ad085c8e Mon Sep 17 00:00:00 2001 From: Francesco Pantano Date: Fri, 4 Oct 2024 14:23:02 +0200 Subject: [PATCH 3/3] Delay jon when a share is deleted If the share cleanup job is executed right after the share deletion, it might be possible that the other manila components have not yet been notified about the event. The result is that the job has no effect. This patch introduces a parameter for the ManilaJob that is supposed to delay the command. A constant has been defined to properly indentify the delay time associated with the service cleanup. Signed-off-by: Francesco Pantano --- controllers/manila_controller.go | 2 ++ pkg/manila/const.go | 5 +++-- pkg/manila/job.go | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/controllers/manila_controller.go b/controllers/manila_controller.go index 6cd3fca3..71ad536f 100644 --- a/controllers/manila_controller.go +++ b/controllers/manila_controller.go @@ -374,6 +374,7 @@ func (r *ManilaReconciler) reconcileInit( nil, manila.DBSyncJobName, manila.DBSyncCommand, + 0, // no need to delay dbSync ) dbSyncjob := job.NewJob( jobDef, @@ -799,6 +800,7 @@ func (r *ManilaReconciler) reconcileNormal(ctx context.Context, instance *manila ptr.To(manila.TTL), fmt.Sprintf("%s-%s", manila.SvcCleanupJobName, hash[:manila.TruncateHash]), manila.SvcCleanupCommand, + manila.ManilaServiceCleanupDelay, ) shareCleanupJob := job.NewJob( jobDef, diff --git a/pkg/manila/const.go b/pkg/manila/const.go index 92cdd05b..560da1a7 100644 --- a/pkg/manila/const.go +++ b/pkg/manila/const.go @@ -43,12 +43,13 @@ const ( ManilaUserID int64 = 42429 // ManilaGroupID - ManilaGroupID int64 = 42429 - // ManilaPublicPort - ManilaPublicPort int32 = 8786 // ManilaInternalPort - ManilaInternalPort int32 = 8786 - + // ManilaServiceCleanupDelay - Time in seconds that the ServiceCleanupJob + // needs to wait before executing the manila-manage command + ManilaServiceCleanupDelay int32 = 120 // ManilaExtraVolTypeUndefined can be used to label an extraMount which // is not associated with a specific backend ManilaExtraVolTypeUndefined storage.ExtraVolType = "Undefined" diff --git a/pkg/manila/job.go b/pkg/manila/job.go index aca2273b..51da78b1 100644 --- a/pkg/manila/job.go +++ b/pkg/manila/job.go @@ -17,6 +17,7 @@ func Job( ttl *int32, jobName string, jobCommand string, + delay int32, ) *batchv1.Job { var config0644AccessMode int32 = 0644 // Unlike the individual manila services, DbSyncJob or a Job executing a @@ -69,7 +70,8 @@ func Job( }, } - args := []string{"-c", jobCommand} + delayCommand := fmt.Sprintf("sleep %d", delay) + args := []string{"-c", fmt.Sprintf("%s && %s", delayCommand, jobCommand)} // add CA cert if defined if instance.Spec.ManilaAPI.TLS.CaBundleSecretName != "" {