diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index e8b789d..f662f2c 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -95,11 +95,6 @@ func NewDaemon() (Daemon, error) { return nil, err } - guidPool, err := guid.NewPool(&daemonConfig.GUIDPool) - if err != nil { - return nil, err - } - pluginLoader := sm.NewPluginLoader() getSmClientFunc, err := pluginLoader.LoadPlugin(path.Join( daemonConfig.PluginPath, daemonConfig.Plugin+".so"), sm.InitializePluginFunc) @@ -125,6 +120,14 @@ func NewDaemon() (Daemon, error) { return nil, validateErr } + guidPool, err := guid.NewPool(&daemonConfig.GUIDPool) + if err != nil { + return nil, err + } + + // Reset guid pool with already allocated guids to avoid collisions + err = syncGuidPool(smClient, guidPool) + podWatcher := watcher.NewWatcher(podEventHandler, client) return &daemon{ config: daemonConfig, @@ -256,7 +259,16 @@ func (d *daemon) processNetworkGUID(networkID string, spec *utils.IbSriovCniSpec } else { guidAddr, err = d.guidPool.GenerateGUID() if err != nil { - return fmt.Errorf("failed to generate GUID for pod ID %s, with error: %v", pi.pod.UID, err) + switch err { + // If the guid pool is exhausted, need to sync with SM in case there are unsynced changes + case guid.GuidPoolExhaustedError: + err = syncGuidPool(d.smClient, d.guidPool) + if err != nil { + return err + } + default: + return fmt.Errorf("failed to generate GUID for pod ID %s, with error: %v", pi.pod.UID, err) + } } allocatedGUID = guidAddr.String() @@ -284,6 +296,20 @@ func (d *daemon) processNetworkGUID(networkID string, spec *utils.IbSriovCniSpec return nil } +func syncGuidPool(smClient plugins.SubnetManagerClient, guidPool guid.Pool) error { + usedGuids, err := smClient.ListGuidsInUse() + if err != nil { + return err + } + + // Reset guid pool with already allocated guids to avoid collisions + err = guidPool.Reset(usedGuids) + if err != nil { + return err + } + return nil +} + // Update and set Pod's network annotation. // If failed to update annotation, pod's GUID added into the list to be removed from Pkey. func (d *daemon) updatePodNetworkAnnotation(pi *podNetworkInfo, removedList *[]net.HardwareAddr) error { @@ -537,7 +563,7 @@ func (d *daemon) DeletePeriodicUpdate() { log.Info().Msg("delete periodic update finished") } -// initPool check the guids that are already allocated by the running pods +// initPool check the guids that are already allocated by the running pods func (d *daemon) initPool() error { log.Info().Msg("Initializing GUID pool.") diff --git a/pkg/guid/guid_pool.go b/pkg/guid/guid_pool.go index 757e476..e425748 100644 --- a/pkg/guid/guid_pool.go +++ b/pkg/guid/guid_pool.go @@ -1,8 +1,8 @@ package guid import ( + "errors" "fmt" - "github.com/rs/zerolog/log" "github.com/Mellanox/ib-kubernetes/pkg/config" @@ -19,8 +19,13 @@ type Pool interface { // ReleaseGUID release the reservation of the guid. // It returns error if the guid is not in the range. ReleaseGUID(string) error + + // Reset clears the current pool and resets it with given values (may be empty) + Reset(guids []string) error } +var GuidPoolExhaustedError = errors.New("GUID pool is exhausted") + type guidPool struct { rangeStart GUID // first guid in range rangeEnd GUID // last guid in range @@ -50,6 +55,34 @@ func NewPool(conf *config.GUIDPoolConfig) (Pool, error) { }, nil } +// Reset clears the current pool and resets it with given values (may be empty) +func (p *guidPool) Reset(guids []string) error { + log.Debug().Msg("resetting guid pool") + + p.guidPoolMap = map[GUID]bool{} + if guids == nil { + return nil + } + + for _, guid := range guids { + guidInRange, err := p.isGuidStringInRange(guid) + if err != nil { + log.Debug().Msgf("error validating GUID: %s: %v", guid, err) + return err + } + if !guidInRange { + // Out of range GUID may be expected and shouldn't be allocated in the pool + continue + } + err = p.AllocateGUID(guid) + if err != nil { + log.Debug().Msgf("error resetting the pool with value: %s: %v", guid, err) + return err + } + } + return nil +} + // GenerateGUID generates a guid from the range func (p *guidPool) GenerateGUID() (GUID, error) { // this look will ensure that we check all the range @@ -62,7 +95,7 @@ func (p *guidPool) GenerateGUID() (GUID, error) { if guid := p.getFreeGUID(p.rangeStart, p.rangeEnd); guid != 0 { return guid, nil } - return 0, fmt.Errorf("guid pool range is full") + return 0, GuidPoolExhaustedError } // ReleaseGUID release allocated guid @@ -88,7 +121,7 @@ func (p *guidPool) AllocateGUID(guid string) error { return err } - if guidAddr < p.rangeStart || guidAddr > p.rangeEnd { + if !p.isGuidInRange(guidAddr) { return fmt.Errorf("out of range guid %s, pool range %v - %v", guid, p.rangeStart, p.rangeEnd) } @@ -104,6 +137,18 @@ func isValidRange(rangeStart, rangeEnd GUID) bool { return rangeStart <= rangeEnd && rangeStart != 0 && rangeEnd != 0xFFFFFFFFFFFFFFFF } +func (p *guidPool) isGuidInRange(guid GUID) bool { + return guid >= p.rangeStart && guid <= p.rangeEnd +} + +func (p *guidPool) isGuidStringInRange(guid string) (bool, error) { + guidAddr, err := ParseGUID(guid) + if err != nil { + return false, err + } + return p.isGuidInRange(guidAddr), nil +} + // getFreeGUID return free guid in given range func (p *guidPool) getFreeGUID(start, end GUID) GUID { for guid := start; guid <= end; guid++ { diff --git a/pkg/guid/guid_test.go b/pkg/guid/guid_test.go index 42110bf..dd59bc8 100644 --- a/pkg/guid/guid_test.go +++ b/pkg/guid/guid_test.go @@ -9,6 +9,57 @@ import ( var _ = Describe("GUID Pool", func() { conf := &config.GUIDPoolConfig{RangeStart: "02:00:00:00:00:00:00:00", RangeEnd: "02:FF:FF:FF:FF:FF:FF:FF"} + Context("ResetPool", func() { + It("Reset pool clears previous values", func() { + pool, err := NewPool(conf) + Expect(err).ToNot(HaveOccurred()) + Expect(pool).ToNot(BeNil()) + + err = pool.AllocateGUID("02:00:00:00:00:00:00:00") + Expect(err).ToNot(HaveOccurred()) + err = pool.AllocateGUID("02:00:00:00:FF:00:00:00") + Expect(err).ToNot(HaveOccurred()) + + pool.Reset(nil) + + err = pool.ReleaseGUID("02:00:00:00:00:00:00:00") + Expect(err).To(HaveOccurred()) + err = pool.ReleaseGUID("02:00:00:00:FF:00:00:00") + Expect(err).To(HaveOccurred()) + }) + It("Reset pool stores new values", func() { + pool, err := NewPool(conf) + Expect(err).ToNot(HaveOccurred()) + Expect(pool).ToNot(BeNil()) + + expectedGuids := []string{"02:00:00:00:00:00:00:3e", "02:00:0F:F0:00:FF:00:09", "02:00:00:00:00:00:00:00"} + + pool.Reset(expectedGuids) + + for _, expectedGuid := range expectedGuids { + err = pool.ReleaseGUID(expectedGuid) + Expect(err).ToNot(HaveOccurred()) + } + }) + It("Exhausted pool throws error and doesn't after reset", func() { + conf := &config.GUIDPoolConfig{RangeStart: "02:00:00:00:00:00:00:00", RangeEnd: "02:00:00:00:00:00:00:00"} + pool, err := NewPool(conf) + Expect(err).ToNot(HaveOccurred()) + Expect(pool).ToNot(BeNil()) + guid, err := pool.GenerateGUID() + Expect(err).ToNot(HaveOccurred()) + err = pool.AllocateGUID(guid.String()) + Expect(err).ToNot(HaveOccurred()) + guid, err = pool.GenerateGUID() + Expect(err).To(Equal(GuidPoolExhaustedError)) + + err = pool.Reset(nil) + Expect(err).ToNot(HaveOccurred()) + + guid, err = pool.GenerateGUID() + Expect(err).ToNot(HaveOccurred()) + }) + }) Context("NewPool", func() { It("Create guid pool with valid parameters", func() { pool, err := NewPool(conf) diff --git a/pkg/sm/plugins/noop/noop.go b/pkg/sm/plugins/noop/noop.go index 407d6a1..bfe5427 100644 --- a/pkg/sm/plugins/noop/noop.go +++ b/pkg/sm/plugins/noop/noop.go @@ -48,6 +48,11 @@ func (p *plugin) RemoveGuidsFromPKey(pkey int, guids []net.HardwareAddr) error { return nil } +func (p *plugin) ListGuidsInUse() ([]string, error) { + log.Info().Msg("noop Plugin ListGuidsInUse()") + return nil, nil +} + // Initialize applies configs to plugin and return a subnet manager client func Initialize() (plugins.SubnetManagerClient, error) { log.Info().Msg("Initializing noop plugin") diff --git a/pkg/sm/plugins/plugin.go b/pkg/sm/plugins/plugin.go index f572eab..bedb508 100644 --- a/pkg/sm/plugins/plugin.go +++ b/pkg/sm/plugins/plugin.go @@ -19,4 +19,7 @@ type SubnetManagerClient interface { // RemoveGuidsFromPKey remove guids for given pkey. // It return error if failed. RemoveGuidsFromPKey(pkey int, guids []net.HardwareAddr) error + + // ListGuidsInUse returns a list of all GUIDS associated with PKeys + ListGuidsInUse() ([]string, error) } diff --git a/pkg/sm/plugins/ufm/ufm.go b/pkg/sm/plugins/ufm/ufm.go index 92c7d22..3b4b5fb 100644 --- a/pkg/sm/plugins/ufm/ufm.go +++ b/pkg/sm/plugins/ufm/ufm.go @@ -1,6 +1,7 @@ package main import ( + "encoding/json" "fmt" "net" "net/http" @@ -133,6 +134,48 @@ func (u *ufmPlugin) RemoveGuidsFromPKey(pKey int, guids []net.HardwareAddr) erro return nil } +// convertToMacAddr adds semicolons each 2 characters to convert to MAC format +// UFM returns GUIDS without any delimiters, so expected format is as follows: +// FF00FF00FF00FF00 +func convertToMacAddr(guid string) string { + for i := 2; i < len(guid); i += 3 { + guid = guid[:i] + ":" + guid[i:] + } + return guid +} + +type Guid struct { + GuidValue string `json:"guid"` +} + +type PKey struct { + Guids []Guid `json:"guids"` +} + +// ListGuidsInUse returns all guids currently in use by pKeys +func (u *ufmPlugin) ListGuidsInUse() ([]string, error) { + response, err := u.client.Get(u.buildURL("/ufmRest/resources/pkeys/?guids_data=true"), http.StatusOK) + if err != nil { + return nil, fmt.Errorf("failed to get the list of guids: %v", err) + } + + var pKeys map[string]PKey + + if err := json.Unmarshal(response, &pKeys); err != nil { + return nil, fmt.Errorf("failed to get the list of guids: %v", err) + } + + var guids []string + + for pkey, _ := range pKeys { + pkeyData := pKeys[pkey] + for _, guidData := range pkeyData.Guids { + guids = append(guids, convertToMacAddr(guidData.GuidValue)) + } + } + return guids, nil +} + func (u *ufmPlugin) buildURL(path string) string { return fmt.Sprintf("%s://%s:%d%s", u.conf.HTTPSchema, u.conf.Address, u.conf.Port, path) } diff --git a/pkg/sm/plugins/ufm/ufm_test.go b/pkg/sm/plugins/ufm/ufm_test.go index 0638789..ce8023f 100644 --- a/pkg/sm/plugins/ufm/ufm_test.go +++ b/pkg/sm/plugins/ufm/ufm_test.go @@ -152,4 +152,43 @@ var _ = Describe("Ufm Subnet Manager Client plugin", func() { Expect(&errMsg).To(Equal(&errMessage)) }) }) + Context("ListGuidsInUse", func() { + It("Remove guid from valid pkey", func() { + testResponse := `{ + "0x7fff": { + "guids": [] + }, + "0x7aff": { + "test": "val" + }, + "0x5": { + "guids": [ + { + "guid": "020000000000003e" + }, + { + "guid": "02000FF000FF0009" + } + ] + }, + "0x6": { + "guids": [ + { + "guid": "0200000000000000" + } + ] + } + }` + + client := &mocks.Client{} + client.On("Get", mock.Anything, mock.Anything).Return([]byte(testResponse), nil) + + plugin := &ufmPlugin{client: client, conf: UFMConfig{}} + guids, err := plugin.ListGuidsInUse() + Expect(err).ToNot(HaveOccurred()) + + expectedGuids := []string{"02:00:00:00:00:00:00:3e", "02:00:0F:F0:00:FF:00:09", "02:00:00:00:00:00:00:00"} + Expect(guids).To(ConsistOf(expectedGuids)) + }) + }) })