Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

320 automated ami upgrades + upgrade locking #327

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/DEVELOPER.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ You can also run `make coverage` to generate a coverage report.

## Running BDD tests

### Dependencies

1. You will need an existing EKS cluster running with the connection details exported into a kube config file.
2. [Keikoproj Minion-Manager](https://github.com/keikoproj/minion-manager) must also be running in the cluster
3. Instance Manager needs to be started outside of the bdd test suite


Export some variables and run `make bdd` to run a functional e2e test.

### Example
Expand Down Expand Up @@ -96,3 +103,5 @@ testing: warning: no tests to run
PASS
ok github.com/keikoproj/instance-manager/test-bdd 1362.336s [no tests to run]
```

Note: If your test cluster uses `InstanceGroups` to run core components, annotating the namespace with `instancemgr.keikoproj.io/config-excluded="true"` can help prevent unexpected disruption.
19 changes: 16 additions & 3 deletions api/v1alpha1/instancegroup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ const (
ReconcileModified ReconcileState = "ReconcileModified"

// End States
ReconcileReady ReconcileState = "Ready"
ReconcileErr ReconcileState = "Error"
ReconcileLocked ReconcileState = "Locked"
preflightsiren marked this conversation as resolved.
Show resolved Hide resolved
ReconcileReady ReconcileState = "Ready"
ReconcileErr ReconcileState = "Error"

// Userdata bootstrap stages
PreBootstrapStage = "PreBootstrap"
Expand Down Expand Up @@ -76,6 +77,8 @@ const (
HostPlacementTenancyType = "host"
DefaultPlacementTenancyType = "default"
DedicatedPlacementTenancyType = "dedicated"

ImageLatestValue = "latest"
)

type ContainerRuntime string
Expand All @@ -87,6 +90,8 @@ const (

DockerRuntime ContainerRuntime = "dockerd"
ContainerDRuntime ContainerRuntime = "containerd"

UpgradeLockedAnnotationKey = "instancemgr.keikoproj.io/lock-upgrades"
preflightsiren marked this conversation as resolved.
Show resolved Hide resolved
)

var (
Expand Down Expand Up @@ -392,6 +397,15 @@ func (ig *InstanceGroup) GetUpgradeStrategy() *AwsUpgradeStrategy {
func (ig *InstanceGroup) SetUpgradeStrategy(strategy AwsUpgradeStrategy) {
ig.Spec.AwsUpgradeStrategy = strategy
}
func (ig *InstanceGroup) Locked() bool {
annotations := ig.GetAnnotations()
if val, ok := annotations[UpgradeLockedAnnotationKey]; ok {
if strings.EqualFold(val, "true") {
return true
}
}
return false
}

func (s *EKSSpec) Validate() error {
var (
Expand Down Expand Up @@ -521,7 +535,6 @@ func (c *EKSConfiguration) Validate() error {
c.SuspendedProcesses = processes
}


if c.BootstrapOptions != nil {
if c.BootstrapOptions.ContainerRuntime != "" && !contains(AllowedContainerRuntimes, c.BootstrapOptions.ContainerRuntime) {
return errors.Errorf("validation failed, 'bootstrapOptions.containerRuntime' must be one of %+v", AllowedContainerRuntimes)
Expand Down
50 changes: 43 additions & 7 deletions api/v1alpha1/instancegroup_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"testing"

"github.com/aws/aws-sdk-go/aws"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type EksUnitTest struct {
Expand Down Expand Up @@ -117,13 +118,13 @@ func TestInstanceGroupSpecValidate(t *testing.T) {
MinSize: 1,
Type: "LaunchTemplate",
EKSConfiguration: &EKSConfiguration{
BootstrapOptions: &BootstrapOptions{ContainerRuntime: "foo"},
EksClusterName: "my-eks-cluster",
NodeSecurityGroups: []string{"sg-123456789"},
Image: "ami-12345",
InstanceType: "m5.large",
KeyPairName: "thisShouldBeOptional",
Subnets: []string{"subnet-1111111", "subnet-222222"},
BootstrapOptions: &BootstrapOptions{ContainerRuntime: "foo"},
EksClusterName: "my-eks-cluster",
NodeSecurityGroups: []string{"sg-123456789"},
Image: "ami-12345",
InstanceType: "m5.large",
KeyPairName: "thisShouldBeOptional",
Subnets: []string{"subnet-1111111", "subnet-222222"},
},
}, nil, nil),
},
Expand Down Expand Up @@ -353,6 +354,41 @@ func TestInstanceGroupSpecValidate(t *testing.T) {
}
}

func TestLockedAnnotation(t *testing.T) {
tests := []struct {
name string
annotation string
expected bool
}{
{
name: "Locked",
annotation: "true",
expected: true,
},
{
name: "Unlocked",
annotation: "false",
expected: false,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
testIg := &InstanceGroup{
ObjectMeta: v1.ObjectMeta{
Annotations: map[string]string{
UpgradeLockedAnnotationKey: test.annotation,
},
},
}
res := testIg.Locked()
if res != test.expected {
t.Errorf("%v: got %v, expected %v", test.name, res, test.expected)
}
})
}
}

func basicFargateSpec() *EKSFargateSpec {
return &EKSFargateSpec{
ClusterName: "",
Expand Down
2 changes: 1 addition & 1 deletion controllers/instancegroup_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ func (r *InstanceGroupReconciler) IsNamespaceAnnotated(namespace, key, value str
}

annotations := unstructuredNamespace.GetAnnotations()
if kubeprovider.HasAnnotation(annotations, key, value) {
if kubeprovider.HasAnnotationWithValue(annotations, key, value) {
return true
}
}
Expand Down
12 changes: 12 additions & 0 deletions controllers/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ type CloudDeployer interface {
GetState() v1alpha.ReconcileState // Gets the current state type of the instance group
SetState(v1alpha.ReconcileState) // Sets the current state of the instance group
IsReady() bool // Returns true if state is Ready
Locked() bool // Returns true if instanceGroup is locked
}

func HandleReconcileRequest(d CloudDeployer) error {
Expand Down Expand Up @@ -54,6 +55,11 @@ func HandleReconcileRequest(d CloudDeployer) error {

// CRUD Nodes Upgrade Strategy
if d.GetState() == v1alpha.ReconcileInitUpgrade {
// Locked
if d.Locked() {
d.SetState(v1alpha.ReconcileLocked)
return nil
}
err = d.UpgradeNodes()
if err != nil {
return err
Expand All @@ -67,12 +73,18 @@ func HandleReconcileRequest(d CloudDeployer) error {

// Bootstrap Nodes
if d.IsReady() {

err = d.BootstrapNodes()
if err != nil {
return err
}

if d.GetState() == v1alpha.ReconcileInitUpgrade {
// Locked
if d.Locked() {
d.SetState(v1alpha.ReconcileLocked)
return nil
}
err = d.UpgradeNodes()
if err != nil {
return err
Expand Down
18 changes: 14 additions & 4 deletions controllers/providers/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/aws/aws-sdk-go/service/eks/eksiface"
"github.com/aws/aws-sdk-go/service/iam/iamiface"
"github.com/aws/aws-sdk-go/service/ssm/ssmiface"
"github.com/pkg/errors"
ctrl "sigs.k8s.io/controller-runtime"
)
Expand All @@ -55,6 +56,7 @@ const (
DescribeLaunchTemplateVersionsTTL time.Duration = 60 * time.Second
DescribeInstanceTypesTTL time.Duration = 24 * time.Hour
DescribeInstanceTypeOfferingTTL time.Duration = 1 * time.Hour
GetParameterTTL time.Duration = 1 * time.Hour

CacheBackgroundPruningInterval time.Duration = 1 * time.Hour
CacheMaxItems int64 = 250
Expand Down Expand Up @@ -117,6 +119,7 @@ type AwsWorker struct {
EksClient eksiface.EKSAPI
IamClient iamiface.IAMAPI
Ec2Client ec2iface.EC2API
SsmClient ssmiface.SSMAPI
Ec2Metadata *ec2metadata.EC2Metadata
Parameters map[string]interface{}
}
Expand Down Expand Up @@ -246,10 +249,9 @@ func GetScalingConfigName(group *autoscaling.Group) string {
}

func GetInstanceTypeNetworkInfo(instanceTypes []*ec2.InstanceTypeInfo, instanceType string) *ec2.NetworkInfo {
for _, instanceTypeInfo := range instanceTypes {
if aws.StringValue(instanceTypeInfo.InstanceType) == instanceType {
return instanceTypeInfo.NetworkInfo
}
i := GetInstanceTypeInfo(instanceTypes, instanceType)
if i != nil {
return i.NetworkInfo
}
return nil
}
Expand All @@ -262,3 +264,11 @@ func GetInstanceTypeInfo(instanceTypes []*ec2.InstanceTypeInfo, instanceType str
}
return nil
}

func GetInstanceTypeArchitectures(instanceTypes []*ec2.InstanceTypeInfo, instanceType string) []string {
i := GetInstanceTypeInfo(instanceTypes, instanceType)
if i != nil {
return aws.StringValueSlice((*i).ProcessorInfo.SupportedArchitectures)
}
return nil
}
72 changes: 72 additions & 0 deletions controllers/providers/aws/ssm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package aws

import (
"fmt"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ssm"
"github.com/aws/aws-sdk-go/service/ssm/ssmiface"
"github.com/keikoproj/aws-sdk-go-cache/cache"
"github.com/keikoproj/instance-manager/controllers/common"
)

type architectureMap map[string]string

const (
EksOptimisedAmiPath = "/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id"
EksOptimisedAmazonLinux2Arm64 = "/aws/service/eks/optimized-ami/%s/amazon-linux-2-arm64/recommended/image_id"
EksOptimisedBottlerocket = "/aws/service/bottlerocket/aws-k8s-%s/x86_64/latest/image_id"
EksOptimisedBottlerocketArm64 = "/aws/service/bottlerocket/aws-k8s-%s/arm64/latest/image_id"
EksOptimisedWindowsCore = "/aws/service/ami-windows-latest/Windows_Server-2019-English-Core-EKS_Optimized-%s/image_id"
EksOptimisedWindowsFull = "/aws/service/ami-windows-latest/Windows_Server-2019-English-Full-EKS_Optimized-%s/image_id"
)

var (
EksAmis = map[string]architectureMap{
"amazonlinux2": architectureMap{
"x86_64": EksOptimisedAmiPath,
"arm64": EksOptimisedAmazonLinux2Arm64,
},
"bottlerocket": architectureMap{
"x86_64": EksOptimisedBottlerocket,
preflightsiren marked this conversation as resolved.
Show resolved Hide resolved
"arm64": EksOptimisedBottlerocketArm64,
},
"windows": architectureMap{
"x86_64": EksOptimisedWindowsCore,
},
}
)

func GetAwsSsmClient(region string, cacheCfg *cache.Config, maxRetries int, collector *common.MetricsCollector) ssmiface.SSMAPI {
config := aws.NewConfig().WithRegion(region).WithCredentialsChainVerboseErrors(true)
config = request.WithRetryer(config, NewRetryLogger(maxRetries, collector))
sess, err := session.NewSession(config)
if err != nil {
panic(err)
}
cache.AddCaching(sess, cacheCfg)
cacheCfg.SetCacheTTL("ssm", "GetParameter", GetParameterTTL)
sess.Handlers.Complete.PushFront(func(r *request.Request) {
ctx := r.HTTPRequest.Context()
log.V(1).Info("AWS API call",
"cacheHit", cache.IsCacheHit(ctx),
"service", r.ClientInfo.ServiceName,
"operation", r.Operation.Name,
)
})
return ssm.New(sess)
}

func (w *AwsWorker) GetEksLatestAmi(OSFamily string, arch string, kubernetesVersion string) (string, error) {
input := &ssm.GetParameterInput{
Name: aws.String(fmt.Sprintf(EksAmis[OSFamily][arch], kubernetesVersion)),
}

output, err := w.SsmClient.GetParameter(input)
if err != nil {
return "", err
}
return aws.StringValue(output.Parameter.Value), nil
}
2 changes: 1 addition & 1 deletion controllers/providers/kubernetes/crd.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ func GetResources(kube dynamic.Interface, instanceGroup *v1alpha1.InstanceGroup,

annotations := ru.GetAnnotations()

if HasAnnotation(annotations, OwnershipAnnotationKey, OwnershipAnnotationValue) && HasAnnotation(annotations, ScopeAnnotationKey, status.GetActiveScalingGroupName()) {
if HasAnnotationWithValue(annotations, OwnershipAnnotationKey, OwnershipAnnotationValue) && HasAnnotationWithValue(annotations, ScopeAnnotationKey, status.GetActiveScalingGroupName()) {
if IsPathValue(ru, statusJSONPath, completedStatus) || IsPathValue(ru, statusJSONPath, errorStatus) {
// if resource is not completed and not failed, it must be still active
inactiveResources = append(inactiveResources, ru)
Expand Down
9 changes: 8 additions & 1 deletion controllers/providers/kubernetes/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,14 @@ func AddAnnotation(u *unstructured.Unstructured, key, value string) {
u.SetAnnotations(annotations)
}

func HasAnnotation(annotations map[string]string, key, value string) bool {
func HasAnnotation(annotations map[string]string, key string) bool {
if _, ok := annotations[key]; ok {
return true
}
return false
}

func HasAnnotationWithValue(annotations map[string]string, key, value string) bool {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed this method as this tests the value of the annotation, not just that it exists - happy for suggestions for method names :)

if val, ok := annotations[key]; ok {
if strings.EqualFold(val, value) {
return true
Expand Down
Loading