Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Maintenance controller #8

Merged
merged 10 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions api/v1alpha1/nodemaintenance_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ const (
ConditionChangedEventType = "ConditionChanged"
)

const (
// MaintenanceFinalizerName is the name of the finalizer being used by NodeMaintenance controllers
MaintenanceFinalizerName = "maintenance.finalizers.nvidia.com"
)

// NodeMaintenanceSpec defines the desired state of NodeMaintenance
type NodeMaintenanceSpec struct {
// RequestorID MUST follow domain name notation format (https://tools.ietf.org/html/rfc1035#section-2.3.1)
Expand Down Expand Up @@ -144,6 +149,8 @@ type NodeMaintenanceStatus struct {
// +listMapKey=type
Conditions []metav1.Condition `json:"conditions,omitempty" patchMergeKey:"type" patchStrategy:"merge" protobuf:"bytes,1,rep,name=conditions"`

// WaitForCompletion is the list of namespaced named pods that we wait to complete
WaitForCompletion []string `json:"waitForCompletion,omitempty"`
// Drain represents the drain status of the node
Drain *DrainStatus `json:"drain,omitempty"`
}
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 36 additions & 8 deletions cmd/maintenance-manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,24 @@ import (

// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
// to ensure that exec-entrypoint and run can make use of them.
"k8s.io/client-go/kubernetes"
_ "k8s.io/client-go/plugin/pkg/client/auth"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"

maintenancev1alpha1 "github.com/Mellanox/maintenance-operator/api/v1alpha1"
"github.com/Mellanox/maintenance-operator/internal/controller"
"github.com/Mellanox/maintenance-operator/internal/cordon"
operatorlog "github.com/Mellanox/maintenance-operator/internal/log"
"github.com/Mellanox/maintenance-operator/internal/podcompletion"
"github.com/Mellanox/maintenance-operator/internal/scheduler"
"github.com/Mellanox/maintenance-operator/internal/version"
//+kubebuilder:scaffold:imports
Expand Down Expand Up @@ -104,7 +109,8 @@ func main() {
TLSOpts: tlsOpts,
})

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
restConfig := ctrl.GetConfigOrDie()
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{
BindAddress: metricsAddr,
Expand Down Expand Up @@ -132,20 +138,30 @@ func main() {
os.Exit(1)
}

k8sInterface, err := kubernetes.NewForConfig(restConfig)
if err != nil {
setupLog.Error(err, "unable to create kubernetes interface")
os.Exit(1)
}

mgrClient := mgr.GetClient()
ykulazhenkov marked this conversation as resolved.
Show resolved Hide resolved

nmrOptions := controller.NewNodeMaintenanceReconcilerOptions()
if err = (&controller.NodeMaintenanceReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Options: nmrOptions,
}).SetupWithManager(mgr); err != nil {
Client: mgrClient,
Scheme: mgr.GetScheme(),
Options: nmrOptions,
CordonHandler: cordon.NewCordonHandler(mgrClient, k8sInterface),
WaitPodCompletionHandler: podcompletion.NewPodCompletionHandler(mgrClient),
}).SetupWithManager(mgr, ctrl.Log.WithName("NodeMaintenanceReconciler")); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "NodeMaintenance")
os.Exit(1)
}

nmsrOptions := controller.NewNodeMaintenanceSchedulerReconcilerOptions()
nmsrLog := ctrl.Log.WithName("NodeMaintenanceScheduler")
if err = (&controller.NodeMaintenanceSchedulerReconciler{
Client: mgr.GetClient(),
Client: mgrClient,
Scheme: mgr.GetScheme(),
Options: nmsrOptions,
Log: nmsrLog,
Expand All @@ -156,7 +172,7 @@ func main() {
}

if err = (&controller.MaintenanceOperatorConfigReconciler{
Client: mgr.GetClient(),
Client: mgrClient,
Scheme: mgr.GetScheme(),
NodeMaintenanceReconcierOptions: nmrOptions,
SchedulerReconcierOptions: nmsrOptions,
Expand All @@ -175,8 +191,20 @@ func main() {
os.Exit(1)
}

ctx := ctrl.SetupSignalHandler()
// index fields in mgr cache

// pod spec.nodeName used in nodemaintenance controller.
err = mgr.GetCache().IndexField(ctx, &corev1.Pod{}, "spec.nodeName", func(o client.Object) []string {
return []string{o.(*corev1.Pod).Spec.NodeName}
})
if err != nil {
setupLog.Error(err, "failed to index field for cache")
os.Exit(1)
}

setupLog.Info("starting manager")
if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil {
if err := mgr.Start(ctx); err != nil {
setupLog.Error(err, "problem running manager")
os.Exit(1)
}
Expand Down
6 changes: 6 additions & 0 deletions config/crd/bases/maintenance.nvidia.com_nodemaintenances.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,12 @@ spec:
type: string
type: array
type: object
waitForCompletion:
description: WaitForCompletion is the list of namespaced named pods
that we wait to complete
items:
type: string
type: array
type: object
type: object
served: true
Expand Down
13 changes: 13 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,19 @@ rules:
verbs:
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- maintenance.nvidia.com
Expand Down
34 changes: 31 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,26 @@ require (
github.com/onsi/gomega v1.33.1
github.com/pkg/errors v0.9.1
go.uber.org/zap v1.26.0
k8s.io/api v0.30.2
k8s.io/apimachinery v0.30.2
k8s.io/client-go v0.30.2
k8s.io/api v0.30.3
k8s.io/apimachinery v0.30.3
k8s.io/client-go v0.30.3
k8s.io/kubectl v0.30.3
sigs.k8s.io/controller-runtime v0.18.4
)

require (
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
github.com/MakeNowJust/heredoc v1.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chai2010/gettext-go v1.0.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-errors/errors v1.4.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
Expand All @@ -31,28 +38,45 @@ require (
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.0.1 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
github.com/moby/spdystream v0.2.0 // indirect
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/prometheus/client_golang v1.16.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/common v0.44.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/spf13/cobra v1.7.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/oauth2 v0.12.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/term v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
Expand All @@ -65,10 +89,14 @@ require (
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.30.1 // indirect
k8s.io/cli-runtime v0.30.3 // indirect
k8s.io/component-base v0.30.3 // indirect
k8s.io/klog/v2 v2.120.1 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 // indirect
sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)
Loading
Loading