diff --git a/aws/adapter.go b/aws/adapter.go index 19df8977..12c0b1cb 100644 --- a/aws/adapter.go +++ b/aws/adapter.go @@ -61,6 +61,7 @@ type Adapter struct { obsoleteInstances []string stackTerminationProtection bool stackTags map[string]string + stackLastTargerGroupARNs map[string][]string controllerID string sslPolicy string ipAddressType string @@ -248,6 +249,7 @@ func NewAdapter(clusterID, newControllerID, vpcID string, debug, disableInstrume nlbCrossZone: DefaultNLBCrossZone, nlbHTTPEnabled: DefaultNLBHTTPEnabled, customFilter: DefaultCustomFilter, + stackLastTargerGroupARNs: make(map[string][]string), TargetCNI: &TargetCNIconfig{ Enabled: false, TargetGroupCh: make(chan []string, 10), @@ -626,13 +628,29 @@ func (a *Adapter) SecurityGroupID() string { // FindManagedStacks returns all CloudFormation stacks containing the controller management tags // that match the current cluster and are ready to be used. The stack status is used to filter. func (a *Adapter) FindManagedStacks() ([]*Stack, error) { - stacks, err := findManagedStacks(a.cloudformation, a.ClusterID(), a.controllerID) + stacks, err := findManagedStacks(a.cloudformation, a.ClusterID(), a.controllerID, a.stackLastTargerGroupARNs) if err != nil { return nil, err } return stacks, nil } +func (a *Adapter) UpdateStackLastTargetGroupARNs(stack *Stack) { + if _, ok := a.stackLastTargerGroupARNs[stack.Name]; !ok { + if len(stack.TargetGroupARNs) > 0 { + a.stackLastTargerGroupARNs[stack.Name] = stack.TargetGroupARNs + } + } +} + +func (a *Adapter) GetStackLastTargetGroupARNs(stackName string) []string { + return a.stackLastTargerGroupARNs[stackName] +} + +func (a *Adapter) CleanLastTargetGroupARNs() { + a.stackLastTargerGroupARNs = make(map[string][]string) +} + // UpdateTargetGroupsAndAutoScalingGroups updates Auto Scaling Groups // config to have relevant Target Groups and registers/deregisters single // instances (that do not belong to ASG) in relevant Target Groups. diff --git a/aws/cf.go b/aws/cf.go index c9ed72a7..0b266805 100644 --- a/aws/cf.go +++ b/aws/cf.go @@ -8,6 +8,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/cloudformation" "github.com/aws/aws-sdk-go/service/cloudformation/cloudformationiface" + log "github.com/sirupsen/logrus" ) const ( @@ -500,13 +501,22 @@ func mapToManagedStack(stack *cloudformation.Stack) *Stack { } } -func findManagedStacks(svc cloudformationiface.CloudFormationAPI, clusterID, controllerID string) ([]*Stack, error) { +func findManagedStacks(svc cloudformationiface.CloudFormationAPI, clusterID, controllerID string, stacksLastTargetGroupARNs map[string][]string) ([]*Stack, error) { stacks := make([]*Stack, 0) err := svc.DescribeStacksPages(&cloudformation.DescribeStacksInput{}, func(page *cloudformation.DescribeStacksOutput, lastPage bool) bool { for _, s := range page.Stacks { if isManagedStack(s.Tags, clusterID, controllerID) { - stacks = append(stacks, mapToManagedStack(s)) + stack := mapToManagedStack(s) + if len(stack.TargetGroupARNs) == 0 && stack.status == cloudformation.StackStatusRollbackInProgress { + if _, ok := stacksLastTargetGroupARNs[stack.Name]; ok { + log.Warnf("stack %s is in rolling back state, falling back to last saved output", stack.Name) + stack.TargetGroupARNs = stacksLastTargetGroupARNs[stack.Name] + } else { + log.Warnf("stack %s has no saved target groups, skipping", stack.Name) + } + } + stacks = append(stacks, stack) } } return true diff --git a/aws/cf_test.go b/aws/cf_test.go index b0acad7e..43d743c5 100644 --- a/aws/cf_test.go +++ b/aws/cf_test.go @@ -530,7 +530,7 @@ func TestFindManagedStacks(t *testing.T) { wantErr: false, }, { - name: "successfull-call-with-rollback-status", + name: "successfull-call-with-one-rollback-status", given: fake.CFOutputs{ DescribeStackPages: fake.R(nil, nil), DescribeStacks: fake.R(&cloudformation.DescribeStacksOutput{ @@ -545,21 +545,36 @@ func TestFindManagedStacks(t *testing.T) { }, Outputs: []*cloudformation.Output{}, }, + { + StackName: aws.String("managed-stack"), + StackStatus: aws.String(cloudformation.StackStatusCreateComplete), + Tags: []*cloudformation.Tag{ + cfTag(kubernetesCreatorTag, DefaultControllerID), + cfTag(clusterIDTagPrefix+"test-cluster", resourceLifecycleOwned), + cfTag(certificateARNTagPrefix+"cert-arn", time.Time{}.Format(time.RFC3339)), + }, + Outputs: []*cloudformation.Output{ + {OutputKey: aws.String(outputLoadBalancerDNSName), OutputValue: aws.String("example.com")}, + {OutputKey: aws.String(outputTargetGroupARN), OutputValue: aws.String("tg-arn")}, + }, + }, }, }, nil), }, want: []*Stack{ { - Name: "managed-stack-rolling-back", + Name: "managed-stack", + DNSName: "example.com", CertificateARNs: map[string]time.Time{ "cert-arn": {}, }, + TargetGroupARNs: []string{"tg-arn"}, tags: map[string]string{ kubernetesCreatorTag: DefaultControllerID, clusterIDTagPrefix + "test-cluster": resourceLifecycleOwned, certificateARNTagPrefix + "cert-arn": time.Time{}.Format(time.RFC3339), }, - status: cloudformation.StackStatusRollbackInProgress, + status: cloudformation.StackStatusCreateComplete, HTTP2: true, }, }, @@ -645,7 +660,7 @@ func TestFindManagedStacks(t *testing.T) { } { t.Run(ti.name, func(t *testing.T) { c := &fake.CFClient{Outputs: ti.given} - got, err := findManagedStacks(c, "test-cluster", DefaultControllerID) + got, err := findManagedStacks(c, "test-cluster", DefaultControllerID, map[string][]string{}) if err != nil { if !ti.wantErr { t.Error("unexpected error", err) diff --git a/worker.go b/worker.go index 86e9beea..aa186e45 100644 --- a/worker.go +++ b/worker.go @@ -287,10 +287,15 @@ func doWork( return problems.Add("failed to list managed stacks: %w", err) } + awsAdapter.CleanLastTargetGroupARNs() + for _, stack := range stacks { if err := stack.Err(); err != nil { problems.Add("stack %s error: %w", stack.Name, err) } + if len(stack.TargetGroupARNs) > 0 { + awsAdapter.UpdateStackLastTargetGroupARNs(stack) + } } err = awsAdapter.UpdateAutoScalingGroupsAndInstances()