From d9b2329ed078f9ff5ba150a6a1a96c1f943d8c29 Mon Sep 17 00:00:00 2001 From: Lukasz Zajaczkowski Date: Mon, 30 Sep 2024 15:50:52 +0200 Subject: [PATCH] fix metricsaggregate sporadic errors --- cmd/agent/kubernetes.go | 28 ++++++++++++++++--------- cmd/agent/main.go | 9 +++----- internal/controller/metricsaggregate.go | 10 +++++---- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/cmd/agent/kubernetes.go b/cmd/agent/kubernetes.go index 2917be27..a015bc3e 100644 --- a/cmd/agent/kubernetes.go +++ b/cmd/agent/kubernetes.go @@ -9,6 +9,12 @@ import ( "github.com/argoproj/argo-rollouts/pkg/apis/rollouts" rolloutv1alpha1 "github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1" roclientset "github.com/argoproj/argo-rollouts/pkg/client/clientset/versioned" + "github.com/pluralsh/deployment-operator/cmd/agent/args" + "github.com/pluralsh/deployment-operator/internal/controller" + "github.com/pluralsh/deployment-operator/pkg/cache" + consoleclient "github.com/pluralsh/deployment-operator/pkg/client" + consolectrl "github.com/pluralsh/deployment-operator/pkg/controller" + "github.com/pluralsh/deployment-operator/pkg/controller/service" "github.com/prometheus/client_golang/prometheus/promhttp" velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "k8s.io/apimachinery/pkg/runtime/schema" @@ -16,17 +22,11 @@ import ( "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" + metricsclientset "k8s.io/metrics/pkg/client/clientset/versioned" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/metrics/server" - - "github.com/pluralsh/deployment-operator/cmd/agent/args" - "github.com/pluralsh/deployment-operator/internal/controller" - "github.com/pluralsh/deployment-operator/pkg/cache" - consoleclient "github.com/pluralsh/deployment-operator/pkg/client" - consolectrl "github.com/pluralsh/deployment-operator/pkg/controller" - "github.com/pluralsh/deployment-operator/pkg/controller/service" ) func initKubeManagerOrDie(config *rest.Config) manager.Manager { @@ -64,7 +64,7 @@ func initKubeManagerOrDie(config *rest.Config) manager.Manager { return mgr } -func initKubeClientsOrDie(config *rest.Config) (rolloutsClient *roclientset.Clientset, dynamicClient *dynamic.DynamicClient, kubeClient *kubernetes.Clientset) { +func initKubeClientsOrDie(config *rest.Config) (rolloutsClient *roclientset.Clientset, dynamicClient *dynamic.DynamicClient, kubeClient *kubernetes.Clientset, metricsClient metricsclientset.Interface) { rolloutsClient, err := roclientset.NewForConfig(config) if err != nil { setupLog.Error(err, "unable to create rollouts client") @@ -83,7 +83,13 @@ func initKubeClientsOrDie(config *rest.Config) (rolloutsClient *roclientset.Clie os.Exit(1) } - return rolloutsClient, dynamicClient, kubeClient + metricsClient, err = metricsclientset.NewForConfig(config) + if err != nil { + setupLog.Error(err, "unable to create metrics client") + os.Exit(1) + } + + return rolloutsClient, dynamicClient, kubeClient, metricsClient } func registerKubeReconcilersOrDie( @@ -94,7 +100,8 @@ func registerKubeReconcilersOrDie( extConsoleClient consoleclient.Client, discoveryClient discovery.DiscoveryInterface, ) { - rolloutsClient, dynamicClient, kubeClient := initKubeClientsOrDie(config) + + rolloutsClient, dynamicClient, kubeClient, metricsClient := initKubeClientsOrDie(config) backupController := &controller.BackupReconciler{ Client: manager.GetClient(), @@ -217,6 +224,7 @@ func registerKubeReconcilersOrDie( Client: manager.GetClient(), Scheme: manager.GetScheme(), DiscoveryClient: discoveryClient, + MetricsClient: metricsClient, }).SetupWithManager(ctx, manager); err != nil { setupLog.Error(err, "unable to create controller", "controller", "MetricsAggregate") } diff --git a/cmd/agent/main.go b/cmd/agent/main.go index 7edbac11..3813a338 100644 --- a/cmd/agent/main.go +++ b/cmd/agent/main.go @@ -13,16 +13,14 @@ import ( "github.com/pluralsh/deployment-operator/pkg/cache" "github.com/pluralsh/deployment-operator/pkg/client" consolectrl "github.com/pluralsh/deployment-operator/pkg/controller" - "k8s.io/client-go/discovery" - "k8s.io/client-go/rest" - "k8s.io/klog/v2" - metricsv1beta1 "k8s.io/metrics/pkg/apis/metrics/v1beta1" - velerov1 "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/discovery" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" ) @@ -39,7 +37,6 @@ func init() { utilruntime.Must(constraintstatusv1beta1.AddToScheme(scheme)) utilruntime.Must(templatesv1.AddToScheme(scheme)) utilruntime.Must(rolloutv1alpha1.AddToScheme(scheme)) - utilruntime.Must(metricsv1beta1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme } diff --git a/internal/controller/metricsaggregate.go b/internal/controller/metricsaggregate.go index 13fb1054..16e4f680 100644 --- a/internal/controller/metricsaggregate.go +++ b/internal/controller/metricsaggregate.go @@ -14,6 +14,7 @@ import ( "k8s.io/client-go/discovery" metricsapi "k8s.io/metrics/pkg/apis/metrics" "k8s.io/metrics/pkg/apis/metrics/v1beta1" + metricsclientset "k8s.io/metrics/pkg/client/clientset/versioned" ctrl "sigs.k8s.io/controller-runtime" k8sClient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" @@ -34,6 +35,7 @@ type MetricsAggregateReconciler struct { k8sClient.Client Scheme *runtime.Scheme DiscoveryClient discovery.DiscoveryInterface + MetricsClient metricsclientset.Interface } // Reconcile IngressReplica ensure that stays in sync with Kubernetes cluster. @@ -94,8 +96,8 @@ func (r *MetricsAggregateReconciler) Reconcile(ctx context.Context, req ctrl.Req } nodeDeploymentNodesMetrics := make([]v1beta1.NodeMetrics, 0) - allNodeMetricsList := &v1beta1.NodeMetricsList{} - if err := r.List(ctx, allNodeMetricsList); err != nil { + allNodeMetricsList, err := r.MetricsClient.MetricsV1beta1().NodeMetricses().List(ctx, metav1.ListOptions{}) + if err != nil { return reconcile.Result{}, err } @@ -126,12 +128,12 @@ func (r *MetricsAggregateReconciler) Reconcile(ctx context.Context, req ctrl.Req utils.MarkCondition(metrics.SetCondition, v1alpha1.ReadyConditionType, metav1.ConditionTrue, v1alpha1.ReadyConditionReason, "") - return requeue(requeueAfter, jitter), reterr + return requeue(time.Second*5, jitter), reterr } // SetupWithManager sets up the controller with the Manager. func (r *MetricsAggregateReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { - debounceReconciler := NewDebounceReconciler(mgr.GetClient(), debounceDuration, r) + debounceReconciler := NewDebounceReconciler(mgr.GetClient(), time.Second*10, r) debounceReconciler.Start(ctx) return ctrl.NewControllerManagedBy(mgr).