diff --git a/charts/gsp-cluster/charts/cluster-autoscaler-5.1.0.tgz b/charts/gsp-cluster/charts/cluster-autoscaler-5.1.0.tgz new file mode 100644 index 000000000..4d5a14c43 Binary files /dev/null and b/charts/gsp-cluster/charts/cluster-autoscaler-5.1.0.tgz differ diff --git a/charts/gsp-cluster/requirements.lock b/charts/gsp-cluster/requirements.lock index 6793c8962..46ddbfcfb 100644 --- a/charts/gsp-cluster/requirements.lock +++ b/charts/gsp-cluster/requirements.lock @@ -1,15 +1,18 @@ dependencies: -- name: fluentd-cloudwatch - repository: https://kubernetes-charts-incubator.storage.googleapis.com/ - version: 0.6.4 +- name: cluster-autoscaler + repository: https://kubernetes-charts.storage.googleapis.com/ + version: 5.1.0 - name: concourse repository: https://kubernetes-charts.storage.googleapis.com/ version: 8.2.5 -- name: kiam - repository: https://kubernetes-charts.storage.googleapis.com/ - version: 2.5.1 +- name: fluentd-cloudwatch + repository: https://kubernetes-charts-incubator.storage.googleapis.com/ + version: 0.6.4 - name: harbor repository: https://helm.goharbor.io/ version: 1.1.3 -digest: sha256:2a0c1a7ffb5abc8ef64db2ccae629ff037e75e250cd1dc40ad41836a72d03476 -generated: "2019-09-23T10:02:21.19228+01:00" +- name: kiam + repository: https://kubernetes-charts.storage.googleapis.com/ + version: 2.5.1 +digest: sha256:151b4ed228617b41d7793f2edd1eb92039d9b243f87ee4d2c893406ea3a67e6f +generated: "2019-10-02T10:55:58.447495+01:00" diff --git a/charts/gsp-cluster/requirements.yaml b/charts/gsp-cluster/requirements.yaml index 471d19199..d90a15f7b 100644 --- a/charts/gsp-cluster/requirements.yaml +++ b/charts/gsp-cluster/requirements.yaml @@ -1,15 +1,19 @@ dependencies: - - name: "fluentd-cloudwatch" - version: 0.6.4 - repository: https://kubernetes-charts-incubator.storage.googleapis.com/ + - name: "cluster-autoscaler" + version: 5.1.0 + repository: https://kubernetes-charts.storage.googleapis.com/ condition: global.runningOnAws - name: "concourse" version: 8.2.5 repository: https://kubernetes-charts.storage.googleapis.com/ - - name: "kiam" - version: 2.5.1 - repository: https://kubernetes-charts.storage.googleapis.com/ + - name: "fluentd-cloudwatch" + version: 0.6.4 + repository: https://kubernetes-charts-incubator.storage.googleapis.com/ condition: global.runningOnAws - name: "harbor" version: 1.1.3 repository: https://helm.goharbor.io/ + - name: "kiam" + version: 2.5.1 + repository: https://kubernetes-charts.storage.googleapis.com/ + condition: global.runningOnAws diff --git a/charts/gsp-cluster/values.yaml b/charts/gsp-cluster/values.yaml index 5e218f0dc..046ab92d7 100644 --- a/charts/gsp-cluster/values.yaml +++ b/charts/gsp-cluster/values.yaml @@ -60,6 +60,15 @@ httpEgressSafelist: [] # requiredApprovalCount: 2 # scope: cluster +cluster-autoscaler: + extraArgs: + balance-similar-node-groups: true + image: + tag: v1.14.5 # upgrade this when upgrading kubernetes + priorityClassName: system-cluster-critical + serviceMonitor: + enabled: true + kiam: nameOverride: fullnameOverride: diff --git a/modules/gsp-cluster/cluster-autoscaler.tf b/modules/gsp-cluster/cluster-autoscaler.tf new file mode 100644 index 000000000..e12469d6a --- /dev/null +++ b/modules/gsp-cluster/cluster-autoscaler.tf @@ -0,0 +1,37 @@ +resource "aws_iam_role" "cluster_autoscaler" { + name = "cluster-autoscaler" + + assume_role_policy = "${data.aws_iam_policy_document.trust_kiam_server.json}" +} + +data "aws_iam_policy_document" "cluster_autoscaler_policy" { + statement { + effect = "Allow" + + actions = [ + "autoscaling:DescribeAutoScalingGroups", + "autoscaling:DescribeAutoScalingInstances", + "autoscaling:DescribeLaunchConfigurations", + "autoscaling:DescribeTags", + ] + + resources = ["*"] + } + + statement { + effect = "Allow" + + actions = [ + "autoscaling:SetDesiredCapacity", + "autoscaling:TerminateInstanceInAutoScalingGroup", + ] + + condition = { + test = "Null" + variable = "autoscaling:ResourceTag/k8s.io/cluster-autoscaler/${var.cluster_name}" + values = ["false"] + } + + resources = ["*"] + } +} diff --git a/modules/gsp-cluster/data/values.yaml b/modules/gsp-cluster/data/values.yaml index 0e965ce45..96d94eb86 100644 --- a/modules/gsp-cluster/data/values.yaml +++ b/modules/gsp-cluster/data/values.yaml @@ -40,6 +40,14 @@ notary: delegationPassphrase: ${notary_delegation_passphrase} delegationKey: ${notary_delegation_key} +cluster-autoscaler: + cloudProvider: aws + awsRegion: eu-west-2 + autoDiscovery: + clusterName: ${cluster_name} + podAnnotations: + iam.amazonaws.com/role: ${cluster_autoscaler_role_name} + concourseMainTeamGithubTeams: ${concourse_main_team_github_teams} concourse: secrets: diff --git a/modules/gsp-cluster/values.tf b/modules/gsp-cluster/values.tf index ba9959d61..98e3afc16 100644 --- a/modules/gsp-cluster/values.tf +++ b/modules/gsp-cluster/values.tf @@ -17,6 +17,7 @@ data "template_file" "values" { sre_role_arns = "${jsonencode(var.sre_role_arns)}" sre_user_arns = "${jsonencode(var.sre_user_arns)}" bootstrap_role_arns = "${jsonencode(module.k8s-cluster.bootstrap_role_arns)}" + cluster_autoscaler_role_name = "${aws_iam_role.cluster_autoscaler.name}" concourse_admin_password = "${random_string.concourse_password.result}" concourse_teams = "${jsonencode(concat(list("main"), var.concourse_teams))}" concourse_main_team_github_teams = "${jsonencode(var.concourse_main_team_github_teams)}" @@ -57,6 +58,7 @@ data "template_file" "values" { permitted_roles_regex = "^(${join("|", list( aws_iam_role.cloudwatch_log_shipping_role.name, + aws_iam_role.cluster_autoscaler.name, aws_iam_role.concourse.name, aws_iam_role.grafana.name, aws_iam_role.gsp-service-operator.name, diff --git a/modules/k8s-cluster/data/nodegroup-v2.yaml b/modules/k8s-cluster/data/nodegroup-v2.yaml index 9bce999ed..9f36834ed 100644 --- a/modules/k8s-cluster/data/nodegroup-v2.yaml +++ b/modules/k8s-cluster/data/nodegroup-v2.yaml @@ -202,6 +202,12 @@ Resources: - Key: !Sub kubernetes.io/cluster/${ClusterName} Value: owned PropagateAtLaunch: true + - Key: k8s.io/cluster-autoscaler/enabled + Value: true + PropagateAtLaunch: true + - Key: !Sub k8s.io/cluster-autoscaler/${ClusterName} + Value: true + PropagateAtLaunch: true UpdatePolicy: AutoScalingRollingUpdate: MaxBatchSize: 2