Skip to content

Commit

Permalink
Fix Port Leaks
Browse files Browse the repository at this point in the history
When an instance fails to come up, there's a bug where the port doesn't
get cleaned up, and eventually you will exhaust your quota. The code
does attempt to garbage collect, but I suspect at some point in the past
ports were suffixed with an index, but the garbage collection code
wasn't made aware of this, so lists nothing. Replace the APi "filtering"
that doesn't work with a better algorithm that uses the instance
creation code to generate the expected ports, then generate the names
and delete if the port exists. Also protected with some nice unit tests.
  • Loading branch information
spjmurray committed Aug 25, 2023
1 parent f938384 commit b77e660
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 27 deletions.
5 changes: 3 additions & 2 deletions controllers/openstackcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,9 @@ func deleteBastion(scope scope.Scope, cluster *clusterv1.Cluster, openStackClust
}
}

rootVolume := openStackCluster.Spec.Bastion.Instance.RootVolume
if err = computeService.DeleteInstance(openStackCluster, instanceStatus, instanceName, rootVolume); err != nil {
instanceSpec := bastionToInstanceSpec(openStackCluster, cluster.Name)

if err = computeService.DeleteInstance(openStackCluster, openStackCluster, instanceStatus, instanceSpec); err != nil {
handleUpdateOSCError(openStackCluster, fmt.Errorf("failed to delete bastion: %w", err))
return fmt.Errorf("failed to delete bastion: %w", err)
}
Expand Down
4 changes: 3 additions & 1 deletion controllers/openstackmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,9 @@ func (r *OpenStackMachineReconciler) reconcileDelete(scope scope.Scope, cluster
}
}

if err := computeService.DeleteInstance(openStackMachine, instanceStatus, openStackMachine.Name, openStackMachine.Spec.RootVolume); err != nil {
instanceSpec := machineToInstanceSpec(openStackCluster, machine, openStackMachine, "")

if err := computeService.DeleteInstance(openStackCluster, openStackMachine, instanceStatus, instanceSpec); err != nil {
conditions.MarkFalse(openStackMachine, infrav1.InstanceReadyCondition, infrav1.InstanceDeleteFailedReason, clusterv1.ConditionSeverityError, "Deleting instance failed: %v", err)
return ctrl.Result{}, fmt.Errorf("delete instance: %w", err)
}
Expand Down
23 changes: 10 additions & 13 deletions pkg/cloud/services/compute/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ func (s *Service) createInstanceImpl(eventObject runtime.Object, openStackCluste
if len(instanceSpec.Tags) > 0 {
iTags = instanceSpec.Tags
}
portName := getPortName(instanceSpec.Name, portOpts, i)
portName := networking.GetPortName(instanceSpec.Name, portOpts, i)
port, err := networkingService.GetOrCreatePort(eventObject, clusterName, portName, portOpts, securityGroups, iTags)
if err != nil {
return nil, err
Expand Down Expand Up @@ -380,14 +380,6 @@ func (s *Service) createInstanceImpl(eventObject runtime.Object, openStackCluste
return createdInstance, nil
}

// getPortName appends a suffix to an instance name in order to try and get a unique name per port.
func getPortName(instanceName string, opts *infrav1.PortOpts, netIndex int) string {
if opts != nil && opts.NameSuffix != "" {
return fmt.Sprintf("%s-%s", instanceName, opts.NameSuffix)
}
return fmt.Sprintf("%s-%d", instanceName, netIndex)
}

func rootVolumeName(instanceName string) string {
return fmt.Sprintf("%s-root", instanceName)
}
Expand Down Expand Up @@ -550,7 +542,7 @@ func (s *Service) GetManagementPort(openStackCluster *infrav1.OpenStackCluster,
return &allPorts[0], nil
}

func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *InstanceStatus, instanceName string, rootVolume *infrav1.RootVolume) error {
func (s *Service) DeleteInstance(openStackCluster *infrav1.OpenStackCluster, eventObject runtime.Object, instanceStatus *InstanceStatus, instanceSpec *InstanceSpec) error {
if instanceStatus == nil {
/*
We create a boot-from-volume instance in 2 steps:
Expand All @@ -570,8 +562,8 @@ func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *Ins
Note that we don't need to separately delete the root volume when deleting the instance because
DeleteOnTermination will ensure it is deleted in that case.
*/
if hasRootVolume(rootVolume) {
name := rootVolumeName(instanceName)
if hasRootVolume(instanceSpec.RootVolume) {
name := rootVolumeName(instanceSpec.Name)
volume, err := s.getVolumeByName(name)
if err != nil {
return err
Expand Down Expand Up @@ -620,7 +612,12 @@ func (s *Service) DeleteInstance(eventObject runtime.Object, instanceStatus *Ins

// delete port of error instance
if instanceStatus.State() == infrav1.InstanceStateError {
if err := networkingService.GarbageCollectErrorInstancesPort(eventObject, instanceStatus.Name()); err != nil {
portOpts, err := s.constructPorts(openStackCluster, instanceSpec)
if err != nil {
return err
}

if err := networkingService.GarbageCollectErrorInstancesPort(eventObject, instanceSpec.Name, portOpts); err != nil {
return err
}
}
Expand Down
11 changes: 9 additions & 2 deletions pkg/cloud/services/compute/instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ import (
infrav1 "sigs.k8s.io/cluster-api-provider-openstack/api/v1alpha7"
"sigs.k8s.io/cluster-api-provider-openstack/pkg/clients"
"sigs.k8s.io/cluster-api-provider-openstack/pkg/clients/mock"
"sigs.k8s.io/cluster-api-provider-openstack/pkg/cloud/services/networking"
"sigs.k8s.io/cluster-api-provider-openstack/pkg/scope"
)

Expand Down Expand Up @@ -110,7 +111,7 @@ func Test_getPortName(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := getPortName(tt.args.instanceName, tt.args.opts, tt.args.netIndex); got != tt.want {
if got := networking.GetPortName(tt.args.instanceName, tt.args.opts, tt.args.netIndex); got != tt.want {
t.Errorf("getPortName() = %v, want %v", got, tt.want)
}
})
Expand Down Expand Up @@ -819,7 +820,13 @@ func TestService_DeleteInstance(t *testing.T) {
if err != nil {
t.Fatalf("Failed to create service: %v", err)
}
if err := s.DeleteInstance(tt.eventObject, tt.instanceStatus(), openStackMachineName, tt.rootVolume); (err != nil) != tt.wantErr {

instanceSpec := &InstanceSpec{
Name: openStackMachineName,
RootVolume: tt.rootVolume,
}

if err := s.DeleteInstance(&infrav1.OpenStackCluster{}, tt.eventObject, tt.instanceStatus(), instanceSpec); (err != nil) != tt.wantErr {
t.Errorf("Service.DeleteInstance() error = %v, wantErr %v", err, tt.wantErr)
}
})
Expand Down
38 changes: 29 additions & 9 deletions pkg/cloud/services/networking/port.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,18 +289,38 @@ func (s *Service) DeletePorts(openStackCluster *infrav1.OpenStackCluster) error
return nil
}

func (s *Service) GarbageCollectErrorInstancesPort(eventObject runtime.Object, instanceName string) error {
portList, err := s.client.ListPort(ports.ListOpts{
Name: instanceName,
})
if err != nil {
return err
}
for _, p := range portList {
if err := s.DeletePort(eventObject, p.ID); err != nil {
func (s *Service) GarbageCollectErrorInstancesPort(eventObject runtime.Object, instanceName string, portOpts []infrav1.PortOpts) error {
for i := range portOpts {
portOpt := &portOpts[i]

portName := GetPortName(instanceName, portOpt, i)

// TODO: whould be nice if gophercloud could be persuaded to accept multiple
// names as is allowed by the API in order to reduce API traffic.
portList, err := s.client.ListPort(ports.ListOpts{Name: portName})
if err != nil {
return err
}

// NOTE: https://github.com/kubernetes-sigs/cluster-api-provider-openstack/issues/1476
// It is up to the end user to specify a UNIQUE cluster name when provisioning in the
// same project, otherwise things will alias and we could delete more than we should.
if len(portList) > 1 {
return fmt.Errorf("garbage collection of port %s failed, found %d ports with the same name", portName, len(portList))
}

if err := s.DeletePort(eventObject, portList[0].ID); err != nil {
return err
}
}

return nil
}

// GetPortName appends a suffix to an instance name in order to try and get a unique name per port.
func GetPortName(instanceName string, opts *infrav1.PortOpts, netIndex int) string {
if opts != nil && opts.NameSuffix != "" {
return fmt.Sprintf("%s-%s", instanceName, opts.NameSuffix)
}
return fmt.Sprintf("%s-%d", instanceName, netIndex)
}
79 changes: 79 additions & 0 deletions pkg/cloud/services/networking/port_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,85 @@ func Test_GetOrCreatePort(t *testing.T) {
}
}

func Test_GarbageCollectErrorInstancesPort(t *testing.T) {
mockCtrl := gomock.NewController(t)
defer mockCtrl.Finish()

instanceName := "foo"
portID1 := "dc6e0ae3-dad6-4240-a9cb-e541916f20d3"
portID2 := "a38ab1cb-c2cc-4c1b-9d1d-696ec73356d2"
portName1 := GetPortName(instanceName, nil, 0)
portName2 := GetPortName(instanceName, nil, 1)

tests := []struct {
// man is the name of the test.
name string
// expect allows definition of any expected calls to the mock.
expect func(m *mock.MockNetworkClientMockRecorder)
// portOpts defines the instance ports as defined in the OSM spec.
portOpts []infrav1.PortOpts
// wantErr defines whether the test is supposed to fail.
wantErr bool
}{
{
name: "garbage collects all ports for an instance",
expect: func(m *mock.MockNetworkClientMockRecorder) {
o1 := ports.ListOpts{
Name: portName1,
}
p1 := []ports.Port{
{
ID: portID1,
Name: portName1,
},
}
m.ListPort(o1).Return(p1, nil)
m.DeletePort(portID1)
o2 := ports.ListOpts{
Name: portName2,
}
p2 := []ports.Port{
{
ID: portID2,
Name: portName2,
},
}

m.ListPort(o2).Return(p2, nil)
m.DeletePort(portID2)
},
portOpts: []infrav1.PortOpts{
{},
{},
},
wantErr: false,
},
}

eventObject := &infrav1.OpenStackMachine{}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
mockClient := mock.NewMockNetworkClient(mockCtrl)
tt.expect(mockClient.EXPECT())
s := Service{
client: mockClient,
}
err := s.GarbageCollectErrorInstancesPort(
eventObject,
instanceName,
tt.portOpts,
)
if tt.wantErr {
g.Expect(err).To(HaveOccurred())
} else {
g.Expect(err).NotTo(HaveOccurred())
}
})
}
}

func pointerTo(b bool) *bool {
return &b
}

0 comments on commit b77e660

Please sign in to comment.