From e7d891897d91d484c8196014f1bf5af0ddea20ff Mon Sep 17 00:00:00 2001 From: Fred Rolland Date: Tue, 15 Aug 2023 10:36:44 +0300 Subject: [PATCH] Migration from cm to CRs In case the IP configuration ConfigMap exists: - Create IPPools CR according to config spec - Read Nodes ranges annotation - Populate the IPpools Status allocations - Clear Nodes ranges annotation - Delete ConfigMap In case an issue is preventing the migration flow, it can be skipped by setting the env var 'MIGRATOR_DISABLE_MIGRATION'. Signed-off-by: Fred Rolland --- cmd/ipam-controller/app/app.go | 17 ++ deploy/nv-ipam.yaml | 2 + pkg/ipam-controller/migrator/migrator.go | 220 ++++++++++++++++++ .../migrator/migrator_suite_test.go | 82 +++++++ pkg/ipam-controller/migrator/migrator_test.go | 179 ++++++++++++++ 5 files changed, 500 insertions(+) create mode 100644 pkg/ipam-controller/migrator/migrator.go create mode 100644 pkg/ipam-controller/migrator/migrator_suite_test.go create mode 100644 pkg/ipam-controller/migrator/migrator_test.go diff --git a/cmd/ipam-controller/app/app.go b/cmd/ipam-controller/app/app.go index 832a542..125008d 100644 --- a/cmd/ipam-controller/app/app.go +++ b/cmd/ipam-controller/app/app.go @@ -18,6 +18,7 @@ package app import ( "context" "fmt" + "os" "github.com/go-logr/logr" "github.com/spf13/cobra" @@ -30,6 +31,7 @@ import ( "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/cache" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/healthz" @@ -45,6 +47,7 @@ import ( "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-controller/allocator" nodectrl "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-controller/controllers/node" poolctrl "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-controller/controllers/pool" + "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-controller/migrator" "github.com/Mellanox/nvidia-k8s-ipam/pkg/version" ) @@ -134,6 +137,20 @@ func RunController(ctx context.Context, config *rest.Config, opts *options.Optio return err } + directClient, err := client.New(config, + client.Options{Scheme: mgr.GetScheme(), Mapper: mgr.GetRESTMapper()}) + if err != nil { + logger.Error(err, "failed to create direct client") + os.Exit(1) + } + + if err := migrator.Migrate(ctx, directClient, opts.ConfigMapName, + opts.ConfigMapNamespace, opts.PoolsNamespace); err != nil { + logger.Error(err, fmt.Sprintf("failed to migrate NV-IPAM config from ConfigMap, "+ + "set %s env variable to disable migration", migrator.EnvDisableMigration)) + return err + } + netAllocator := allocator.New() nodeEventCH := make(chan event.GenericEvent, 1) diff --git a/deploy/nv-ipam.yaml b/deploy/nv-ipam.yaml index 8e31721..30b3b00 100644 --- a/deploy/nv-ipam.yaml +++ b/deploy/nv-ipam.yaml @@ -147,6 +147,7 @@ rules: - get - list - watch + - delete - apiGroups: - nv-ipam.nvidia.com resources: @@ -155,6 +156,7 @@ rules: - get - list - watch + - create - apiGroups: - nv-ipam.nvidia.com resources: diff --git a/pkg/ipam-controller/migrator/migrator.go b/pkg/ipam-controller/migrator/migrator.go new file mode 100644 index 0000000..8c1b956 --- /dev/null +++ b/pkg/ipam-controller/migrator/migrator.go @@ -0,0 +1,220 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package migrator + +import ( + "context" + "encoding/json" + "fmt" + "net" + "os" + "reflect" + "strings" + + corev1 "k8s.io/api/core/v1" + apiErrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + ipamv1alpha1 "github.com/Mellanox/nvidia-k8s-ipam/api/v1alpha1" + "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-controller/config" + "github.com/Mellanox/nvidia-k8s-ipam/pkg/pool" + + "github.com/go-logr/logr" +) + +const ( + // EnvDisableMigration contains the name of the environment variable which can be used + // to disable migration + EnvDisableMigration = "MIGRATOR_DISABLE_MIGRATION" +) + +func Migrate(ctx context.Context, c client.Client, cmName string, cmNamespace string, poolNamespace string) error { + logger := logr.FromContextOrDiscard(ctx).WithName("migrator") + if os.Getenv(EnvDisableMigration) != "" { + logger.Info(fmt.Sprintf("%s set, skip controller migration", EnvDisableMigration)) + return nil + } + + cfg := &corev1.ConfigMap{} + key := types.NamespacedName{ + Name: cmName, + Namespace: cmNamespace, + } + err := c.Get(ctx, key, cfg) + if err != nil { + if apiErrors.IsNotFound(err) { + logger.Info("ConfigMap not found, skipping migration") + return nil + } + logger.Error(err, "failed to read ConfigMap object") + return err + } + + confData, exist := cfg.Data[config.ConfigMapKey] + if !exist { + logger.Error(nil, fmt.Sprintf("invalid configuration: ConfigMap %s doesn't contain %s key", + key, config.ConfigMapKey)) + return err + } + controllerConfig := &config.Config{} + if err := json.Unmarshal([]byte(confData), controllerConfig); err != nil { + logger.Error(err, fmt.Sprintf("invalid configuration: ConfigMap %s contains invalid JSON", + config.ConfigMapKey)) + return err + } + if err := controllerConfig.Validate(); err != nil { + logger.Error(err, fmt.Sprintf("invalid configuration: ConfigMap %s contains invalid config", + config.ConfigMapKey)) + return err + } + + pools := buildIPPools(controllerConfig, poolNamespace) + + for name, p := range pools { + err = c.Create(ctx, p) + logger.Info(fmt.Sprintf("Creating IPPool: %v", p)) + if apiErrors.IsAlreadyExists(err) { + existingPool := &ipamv1alpha1.IPPool{} + err = c.Get(ctx, client.ObjectKeyFromObject(p), existingPool) + if err != nil { + logger.Info("fail to get existing pool", "pool name", name) + return err + } + if !reflect.DeepEqual(existingPool.Spec, p.Spec) { + logger.Info("existing pool has different spec than config map setting", "pool name", name) + return fmt.Errorf("existing pool has different spec than config map setting") + } + } else if err != nil { + logger.Info("fail to create pool", "pool name", name) + return err + } + } + + err = updateAllocations(ctx, c, logger, pools, poolNamespace) + if err != nil { + return err + } + + err = c.Delete(ctx, &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Namespace: cmNamespace, Name: cmName}}) + if err != nil { + logger.Info("fail to delete nv-ipam config map") + return err + } + + return nil +} + +func updateAllocations(ctx context.Context, c client.Client, + logger logr.Logger, pools map[string]*ipamv1alpha1.IPPool, poolNamespace string) error { + nodeList := &corev1.NodeList{} + if err := c.List(ctx, nodeList); err != nil { + logger.Error(err, "failed to list nodes") + return err + } + for i := range nodeList.Items { + node := nodeList.Items[i] + nodeLog := logger.WithValues("node", node.Name) + poolCfg, err := pool.NewConfigReader(&node) + if err != nil { + nodeLog.Info("skip loading data from the node", "reason", err.Error()) + continue + } + for poolName := range pools { + nodeIPPoolConfig := poolCfg.GetPoolByName(poolName) + if nodeIPPoolConfig == nil { + nodeLog.Info("skip loading data for pool from the node, pool not configured", "node", node.Name, "pool", poolName) + continue + } + p := &ipamv1alpha1.IPPool{} + key := types.NamespacedName{ + Name: poolName, + Namespace: poolNamespace, + } + err := c.Get(ctx, key, p) + if err != nil { + nodeLog.Info("fail getting pool", "reason", err.Error()) + return err + } + if len(p.Status.Allocations) == 0 { + p.Status.Allocations = make([]ipamv1alpha1.Allocation, 0) + } + alloc := ipamv1alpha1.Allocation{ + NodeName: node.Name, + StartIP: nodeIPPoolConfig.StartIP, + EndIP: nodeIPPoolConfig.EndIP, + } + p.Status.Allocations = append(p.Status.Allocations, alloc) + logger.Info(fmt.Sprintf("Updating IPPool status: %v", p)) + err = c.Status().Update(ctx, p) + if err != nil { + nodeLog.Info("fail to update pool allocation from node", "reason", err.Error()) + return err + } + } + fmtKey := strings.ReplaceAll(pool.IPBlocksAnnotation, "/", "~1") + patch := []byte(fmt.Sprintf("[{\"op\": \"remove\", \"path\": \"/metadata/annotations/%s\"}]", fmtKey)) + err = c.Patch(ctx, &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: node.Name, + }, + }, client.RawPatch(types.JSONPatchType, patch)) + if err != nil { + logger.Info("fail to remove IPBlocksAnnotation from node", "name", node.Name, "reason", err.Error()) + return err + } + } + return nil +} + +func buildIPPools(controllerConfig *config.Config, poolNamespace string) map[string]*ipamv1alpha1.IPPool { + var nodeSelector *corev1.NodeSelector + if len(controllerConfig.NodeSelector) > 0 { + nodeSelector = &corev1.NodeSelector{} + selectorsItems := make([]corev1.NodeSelectorTerm, 0, len(controllerConfig.NodeSelector)) + for k, v := range controllerConfig.NodeSelector { + selector := corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: k, + Operator: corev1.NodeSelectorOpIn, + Values: []string{v}, + }, + }, + MatchFields: nil, + } + selectorsItems = append(selectorsItems, selector) + } + nodeSelector.NodeSelectorTerms = selectorsItems + } + pools := make(map[string]*ipamv1alpha1.IPPool) + for pName, p := range controllerConfig.Pools { + // already validated by Validate function + _, subnet, _ := net.ParseCIDR(p.Subnet) + pools[pName] = &ipamv1alpha1.IPPool{ + ObjectMeta: metav1.ObjectMeta{ + Name: pName, + Namespace: poolNamespace, + }, + Spec: ipamv1alpha1.IPPoolSpec{ + Subnet: subnet.String(), + Gateway: p.Gateway, + PerNodeBlockSize: p.PerNodeBlockSize, + NodeSelector: nodeSelector, + }, + } + } + return pools +} diff --git a/pkg/ipam-controller/migrator/migrator_suite_test.go b/pkg/ipam-controller/migrator/migrator_suite_test.go new file mode 100644 index 0000000..bf77736 --- /dev/null +++ b/pkg/ipam-controller/migrator/migrator_suite_test.go @@ -0,0 +1,82 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package migrator_test + +import ( + "context" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + + ipamv1alpha1 "github.com/Mellanox/nvidia-k8s-ipam/api/v1alpha1" +) + +const ( + TestNamespace = "test-ns" + TestConfigMapName = "test-config" +) + +var ( + cfg *rest.Config + k8sClient client.Client + testEnv *envtest.Environment + cFunc context.CancelFunc + ctx context.Context +) + +func TestMigrator(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "IPAM Controller Migrator Suite") +} + +var _ = BeforeSuite(func() { + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{"../../../deploy/crds"}, + CRDInstallOptions: envtest.CRDInstallOptions{ + ErrorIfPathMissing: true, + }, + } + + ctx, cFunc = context.WithCancel(context.Background()) + + var err error + err = ipamv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + Expect(k8sClient.Create(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: TestNamespace}})).To(BeNil()) +}) + +var _ = AfterSuite(func() { + cFunc() + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/pkg/ipam-controller/migrator/migrator_test.go b/pkg/ipam-controller/migrator/migrator_test.go new file mode 100644 index 0000000..3fad0cd --- /dev/null +++ b/pkg/ipam-controller/migrator/migrator_test.go @@ -0,0 +1,179 @@ +/* + Copyright 2023, NVIDIA CORPORATION & AFFILIATES + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package migrator_test + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + apiErrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-controller/config" + "github.com/Mellanox/nvidia-k8s-ipam/pkg/ipam-controller/migrator" + "github.com/Mellanox/nvidia-k8s-ipam/pkg/pool" + + ipamv1alpha1 "github.com/Mellanox/nvidia-k8s-ipam/api/v1alpha1" +) + +const ( + pool1Name = "pool1" + pool2Name = "pool2" +) + +func updateConfigMap(data string) { + d := map[string]string{config.ConfigMapKey: data} + err := k8sClient.Create(ctx, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: TestConfigMapName, Namespace: TestNamespace}, + Data: d, + }) + if err == nil { + return + } + if apiErrors.IsAlreadyExists(err) { + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get( + ctx, types.NamespacedName{Name: TestConfigMapName, Namespace: TestNamespace}, configMap)).NotTo(HaveOccurred()) + configMap.Data = d + Expect(k8sClient.Update( + ctx, configMap)).NotTo(HaveOccurred()) + } else { + Expect(err).NotTo(HaveOccurred()) + } +} + +var validConfig = fmt.Sprintf(` + { + "pools": { + "%s": { "subnet": "192.168.0.0/16", "perNodeBlockSize": 10 , "gateway": "192.168.0.1"}, + "%s": { "subnet": "172.16.0.0/16", "perNodeBlockSize": 50 , "gateway": "172.16.0.1"} + } + } +`, pool1Name, pool2Name) + +func createNode(name string) *corev1.Node { + node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: name}} + Expect(k8sClient.Create(ctx, node)).NotTo(HaveOccurred()) + return node +} + +func getNode(name string) *corev1.Node { + node := &corev1.Node{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: name}, node)).NotTo(HaveOccurred()) + return node +} + +func updateNode(node *corev1.Node) *corev1.Node { + Expect(k8sClient.Update(ctx, node)).NotTo(HaveOccurred()) + return node +} + +func getRangeFromNode(nodeName string) map[string]*pool.IPPool { + node := getNode(nodeName) + poolCfg, err := pool.NewConfigReader(node) + if err != nil { + return nil + } + return poolCfg.GetPools() +} + +// WaitAndCheckForStability wait for condition and then check it is stable for 1 second +func WaitAndCheckForStability(check func(g Gomega), wait interface{}, stability interface{}) { + Eventually(func(g Gomega) { check(g) }, wait).Should(Succeed()) + Consistently(func(g Gomega) { check(g) }, stability).Should(Succeed()) +} + +var _ = Describe("Controller Migrator", func() { + It("Basic tests", func() { + testNode1 := "node1" + testNode2 := "node2" + + By("Create valid cfg1") + updateConfigMap(validConfig) + + By("Set annotation with valid ranges for node1") + node1 := createNode(testNode1) + node1InitialRanges := map[string]*pool.IPPool{pool1Name: { + Name: pool1Name, + Subnet: "192.168.0.0/16", + StartIP: "192.168.0.11", + EndIP: "192.168.0.20", + Gateway: "192.168.0.1", + }, pool2Name: { + Name: pool2Name, + Subnet: "172.16.0.0/16", + StartIP: "172.16.0.1", + EndIP: "172.16.0.50", + Gateway: "172.16.0.1", + }} + Expect(pool.SetIPBlockAnnotation(node1, node1InitialRanges)).NotTo(HaveOccurred()) + Expect(updateNode(node1)) + + By("Set annotation with valid ranges for node2") + node2 := createNode(testNode2) + node2InitialRanges := map[string]*pool.IPPool{pool1Name: { + Name: pool1Name, + Subnet: "192.168.0.0/16", + StartIP: "192.168.0.21", + EndIP: "192.168.0.30", + Gateway: "192.168.0.1", + }, pool2Name: { + Name: pool2Name, + Subnet: "172.16.0.0/16", + StartIP: "172.16.0.51", + EndIP: "172.16.0.100", + Gateway: "172.16.0.1", + }} + Expect(pool.SetIPBlockAnnotation(node2, node2InitialRanges)).NotTo(HaveOccurred()) + Expect(updateNode(node2)) + + By("Run migrator") + Expect(migrator.Migrate(ctx, k8sClient, TestConfigMapName, TestNamespace, TestNamespace)).NotTo(HaveOccurred()) + + By("Verify Pool1 Spec") + pool1 := &ipamv1alpha1.IPPool{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: TestNamespace, Name: pool1Name}, pool1)).NotTo(HaveOccurred()) + Expect(pool1.Spec.Gateway == "192.168.0.1" && + pool1.Spec.Subnet == "192.168.0.0/16" && pool1.Spec.PerNodeBlockSize == 10).To(BeTrue()) + + By("Verify Pool1 Allocations") + expectedAllocations := []ipamv1alpha1.Allocation{{NodeName: testNode1, StartIP: "192.168.0.11", EndIP: "192.168.0.20"}, + {NodeName: testNode2, StartIP: "192.168.0.21", EndIP: "192.168.0.30"}} + Expect(expectedAllocations).To(BeEquivalentTo(pool1.Status.Allocations)) + + By("Verify Pool2 Spec") + pool2 := &ipamv1alpha1.IPPool{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Namespace: TestNamespace, Name: pool2Name}, pool2)).NotTo(HaveOccurred()) + Expect(pool2.Spec.Gateway == "172.16.0.1" && + pool2.Spec.Subnet == "172.16.0.0/16" && pool2.Spec.PerNodeBlockSize == 50).To(BeTrue()) + + By("Verify Pool2 Allocations") + expectedAllocations = []ipamv1alpha1.Allocation{{NodeName: testNode1, StartIP: "192.168.0.11", EndIP: "192.168.0.20"}, + {NodeName: testNode2, StartIP: "192.168.0.21", EndIP: "192.168.0.30"}} + Expect(expectedAllocations).To(BeEquivalentTo(pool1.Status.Allocations)) + + By("Verify Nodes annotations are removed") + Expect(getRangeFromNode(testNode1)).To(BeEmpty()) + Expect(getRangeFromNode(testNode2)).To(BeEmpty()) + + By("Verify Config Map is deleted") + configMap := &corev1.ConfigMap{} + Expect(k8sClient.Get( + ctx, types.NamespacedName{Name: TestConfigMapName, Namespace: TestNamespace}, configMap)).To(HaveOccurred()) + }) +})