From 8232e07c752eedad98515016f23331d56e480912 Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Tue, 16 Apr 2024 19:04:13 +0000 Subject: [PATCH 1/4] microceph/cmd/microceph: Use microcluster API to remove cluster members Signed-off-by: Max Asnaashari --- microceph/cmd/microceph/cluster_remove.go | 118 +--------------------- 1 file changed, 2 insertions(+), 116 deletions(-) diff --git a/microceph/cmd/microceph/cluster_remove.go b/microceph/cmd/microceph/cluster_remove.go index 55d7bcba..2549584f 100644 --- a/microceph/cmd/microceph/cluster_remove.go +++ b/microceph/cmd/microceph/cluster_remove.go @@ -1,14 +1,10 @@ package main import ( - "fmt" + "context" - "github.com/canonical/lxd/shared/logger" - microCli "github.com/canonical/microcluster/client" "github.com/canonical/microcluster/microcluster" "github.com/spf13/cobra" - - "github.com/canonical/microceph/microceph/client" ) type cmdClusterRemove struct { @@ -45,115 +41,5 @@ func (c *cmdClusterRemove) Run(cmd *cobra.Command, args []string) error { return err } - return removeNode(cli, args[0], c.flagForce) -} - -func removeNode(cli *microCli.Client, node string, force bool) error { - - logger.Debugf("Removing cluster member %v, force: %v", node, force) - - // check prerquisites unless we're forcing - if !force { - err := checkPrerequisites(cli, node) - if err != nil { - return err - } - } - - // delete from ceph - err := deleteNodeServices(cli, node) - if err != nil { - // forcing makes errs non-fatal - if !force { - return err - } - logger.Warnf("Error deleting services from node %v: %v", node, err) - } - - // delete from cluster db - err = client.MClient.DeleteClusterMember(cli, node, force) - logger.Debugf("DeleteClusterMember %v: %v", node, err) - if err != nil { - return err - } - - return nil -} - -func checkPrerequisites(cli *microCli.Client, name string) error { - // check if member exists - clusterMembers, err := client.MClient.GetClusterMembers(cli) - if err != nil { - return fmt.Errorf("Error getting cluster members: %v", err) - } - found := false - for _, member := range clusterMembers { - if member == name { - found = true - } - } - if !found { - return fmt.Errorf("Node %v not found", name) - } - - // check if any OSDs present - disks, err := client.MClient.GetDisks(cli) - if err != nil { - return fmt.Errorf("Error getting disks: %v", err) - } - found = false - for _, disk := range disks { - if disk.Location == name { - found = true - } - } - logger.Debugf("Disks: %v, found: %v", disks, found) - if found { - return fmt.Errorf("Node %v still has disks configured, remove before proceeding", name) - } - - // check if this node has the last mon - services, err := client.MClient.GetServices(cli) - if err != nil { - return fmt.Errorf("Error getting services: %v", err) - } - // create a map of service names counters - // init with false - foundMap := map[string]int{ - "mon": 0, - "mgr": 0, - "mds": 0, - } - // loop through services and check service counts - for _, service := range services { - if service.Location == name { - continue - } - foundMap[service.Service]++ - } - logger.Debugf("Services: %v, foundMap: %v", services, foundMap) - if foundMap["mon"] < 3 || foundMap["mgr"] < 1 || foundMap["mds"] < 1 { - return fmt.Errorf("Need at least 3 mon, 1 mds, and 1 mgr besides %v", name) - } - - return nil -} - -func deleteNodeServices(cli *microCli.Client, name string) error { - services, err := client.MClient.GetServices(cli) - if err != nil { - return err - } - for _, service := range services { - logger.Debugf("Check for deletion: %s", service) - if service.Location == name { - logger.Debugf("Deleting service %s", service) - err = client.MClient.DeleteService(cli, service.Location, service.Service) - if err != nil { - logger.Warnf("Fault deleting service %v on node %v: %v", service.Service, service.Location, err) - } - } - } - return nil - + return cli.DeleteClusterMember(context.Background(), args[0], c.flagForce) } From aadcf529324f1b4a82f4491eed8723ed0255729d Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Tue, 16 Apr 2024 19:04:50 +0000 Subject: [PATCH 2/4] microceph/ceph: Move ceph cleanup into ceph package Signed-off-by: Max Asnaashari --- microceph/ceph/remove.go | 120 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 microceph/ceph/remove.go diff --git a/microceph/ceph/remove.go b/microceph/ceph/remove.go new file mode 100644 index 00000000..7729465e --- /dev/null +++ b/microceph/ceph/remove.go @@ -0,0 +1,120 @@ +package ceph + +import ( + "fmt" + + "github.com/canonical/lxd/shared/logger" + microCli "github.com/canonical/microcluster/client" + + "github.com/canonical/microceph/microceph/client" +) + +func removeNode(cli *microCli.Client, node string, force bool) error { + + logger.Debugf("Removing cluster member %v, force: %v", node, force) + + // check prerquisites unless we're forcing + if !force { + err := checkPrerequisites(cli, node) + if err != nil { + return err + } + } + + // delete from ceph + err := deleteNodeServices(cli, node) + if err != nil { + // forcing makes errs non-fatal + if !force { + return err + } + logger.Warnf("Error deleting services from node %v: %v", node, err) + } + + // delete from cluster db + err = client.MClient.DeleteClusterMember(cli, node, force) + logger.Debugf("DeleteClusterMember %v: %v", node, err) + if err != nil { + return err + } + + return nil +} + +func checkPrerequisites(cli *microCli.Client, name string) error { + // check if member exists + clusterMembers, err := client.MClient.GetClusterMembers(cli) + if err != nil { + return fmt.Errorf("Error getting cluster members: %v", err) + } + found := false + for _, member := range clusterMembers { + if member == name { + found = true + } + } + if !found { + return fmt.Errorf("Node %v not found", name) + } + + // check if any OSDs present + disks, err := client.MClient.GetDisks(cli) + if err != nil { + return fmt.Errorf("Error getting disks: %v", err) + } + found = false + for _, disk := range disks { + if disk.Location == name { + found = true + } + } + logger.Debugf("Disks: %v, found: %v", disks, found) + if found { + return fmt.Errorf("Node %v still has disks configured, remove before proceeding", name) + } + + // check if this node has the last mon + services, err := client.MClient.GetServices(cli) + if err != nil { + return fmt.Errorf("Error getting services: %v", err) + } + // create a map of service names counters + // init with false + foundMap := map[string]int{ + "mon": 0, + "mgr": 0, + "mds": 0, + } + // loop through services and check service counts + for _, service := range services { + if service.Location == name { + continue + } + foundMap[service.Service]++ + } + logger.Debugf("Services: %v, foundMap: %v", services, foundMap) + if foundMap["mon"] < 3 || foundMap["mgr"] < 1 || foundMap["mds"] < 1 { + return fmt.Errorf("Need at least 3 mon, 1 mds, and 1 mgr besides %v", name) + } + + return nil +} + +func deleteNodeServices(cli *microCli.Client, name string) error { + services, err := client.MClient.GetServices(cli) + if err != nil { + return err + } + for _, service := range services { + logger.Debugf("Check for deletion: %s", service) + if service.Location == name { + logger.Debugf("Deleting service %s", service) + err = client.MClient.DeleteService(cli, service.Location, service.Service) + if err != nil { + logger.Warnf("Fault deleting service %v on node %v: %v", service.Service, service.Location, err) + } + } + } + return nil + +} From 25fefb08d1fe2a1db388eeff5d65dac35c16424f Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Tue, 16 Apr 2024 19:07:39 +0000 Subject: [PATCH 3/4] microceph/cmd/microcephd: Add PreRemove hook to the microcluster daemon Instead of directly calling the ceph service cleanup functions in the CLI, this uses microcluster's `PreRemove` hook to clean up the services before removing the node from the cluster. To that end, the `removeNode` function has been wrapped in `PreRemove` and it returns a function with a signature that can be accepted by microcluster.Start as a hook. The `node` field has been replaced with `state.Name` because this function will only run on the node that is being removed, so its name will be recorded in the State struct. Signed-off-by: Max Asnaashari --- microceph/ceph/remove.go | 21 ++++++++++++++------- microceph/cmd/microcephd/main.go | 2 ++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/microceph/ceph/remove.go b/microceph/ceph/remove.go index 7729465e..f1d1e7cb 100644 --- a/microceph/ceph/remove.go +++ b/microceph/ceph/remove.go @@ -5,10 +5,24 @@ import ( "github.com/canonical/lxd/shared/logger" microCli "github.com/canonical/microcluster/client" + "github.com/canonical/microcluster/microcluster" + "github.com/canonical/microcluster/state" "github.com/canonical/microceph/microceph/client" ) +// PreRemove cleans up the underlying ceph services before the node is removed from the dqlite cluster. +func PreRemove(m *microcluster.MicroCluster) func(s *state.State, force bool) error { + return func(s *state.State, force bool) error { + cli, err := m.LocalClient() + if err != nil { + return err + } + + return removeNode(cli, s.Name(), force) + } +} + func removeNode(cli *microCli.Client, node string, force bool) error { logger.Debugf("Removing cluster member %v, force: %v", node, force) @@ -31,13 +45,6 @@ func removeNode(cli *microCli.Client, node string, force bool) error { logger.Warnf("Error deleting services from node %v: %v", node, err) } - // delete from cluster db - err = client.MClient.DeleteClusterMember(cli, node, force) - logger.Debugf("DeleteClusterMember %v: %v", node, err) - if err != nil { - return err - } - return nil } diff --git a/microceph/cmd/microcephd/main.go b/microceph/cmd/microcephd/main.go index cb65a177..39940034 100644 --- a/microceph/cmd/microcephd/main.go +++ b/microceph/cmd/microcephd/main.go @@ -86,6 +86,8 @@ func (c *cmdDaemon) Run(cmd *cobra.Command, args []string) error { return ceph.Start(interf) } + h.PreRemove = ceph.PreRemove(m) + m.AddServers(api.Servers) return m.Start(context.Background(), database.SchemaExtensions, nil, h) } From 9266ea66c608c0901e97722bf3d695ed6444d195 Mon Sep 17 00:00:00 2001 From: Max Asnaashari Date: Tue, 16 Apr 2024 19:45:49 +0000 Subject: [PATCH 4/4] microceph/ceph: Update cluster remove test Moves the removeNode test over to the `ceph` package, and removes the `DeleteClusterMember` expected calls as this function no longer manages that. Signed-off-by: Max Asnaashari --- .../pre_remove_test.go} | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) rename microceph/{cmd/microceph/cluster_remove_test.go => ceph/pre_remove_test.go} (87%) diff --git a/microceph/cmd/microceph/cluster_remove_test.go b/microceph/ceph/pre_remove_test.go similarity index 87% rename from microceph/cmd/microceph/cluster_remove_test.go rename to microceph/ceph/pre_remove_test.go index 4b033a01..9fd925a1 100644 --- a/microceph/cmd/microceph/cluster_remove_test.go +++ b/microceph/ceph/pre_remove_test.go @@ -1,12 +1,13 @@ -package main +package ceph import ( + "testing" + "github.com/canonical/microceph/microceph/api/types" "github.com/canonical/microceph/microceph/client" "github.com/canonical/microceph/microceph/mocks" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" - "testing" "github.com/stretchr/testify/suite" ) @@ -46,7 +47,6 @@ func (s *clusterRemoveSuite) TestRemoveNode() { nil, ) m.On("DeleteService", mock.Anything, "foonode", "mon").Return(nil).Once() - m.On("DeleteClusterMember", mock.Anything, "foonode", false).Return(nil).Once() err := removeNode(nil, "foonode", false) @@ -68,9 +68,6 @@ func (s *clusterRemoveSuite) TestRemoveNodeWithDisks() { err := removeNode(nil, "foonode", false) assert.Error(s.T(), err) - - // assert that we didn't try to delete the node - m.AssertNotCalled(s.T(), "DeleteClusterMember", mock.Anything, "foonode", false) } // TestRemoveNodeLastMon tests that we don't try to delete a node that has the last mon @@ -93,9 +90,6 @@ func (s *clusterRemoveSuite) TestRemoveNodeLastMon() { err := removeNode(nil, "foonode", false) assert.Error(s.T(), err) - - // assert that we didn't try to delete the node - m.AssertNotCalled(s.T(), "DeleteClusterMember", mock.Anything, "foonode", false) } // TestRemoveNodeForce tests that we don't check prerequisites and delete a node if forced @@ -114,7 +108,6 @@ func (s *clusterRemoveSuite) TestRemoveNodeForce() { nil, ) m.On("DeleteService", mock.Anything, "foonode", "mon").Return(nil).Once() - m.On("DeleteClusterMember", mock.Anything, "foonode", true).Return(nil).Once() err := removeNode(nil, "foonode", true)