diff --git a/playbooks/deploy_capi_mgmt.yml b/playbooks/deploy_capi_mgmt.yml new file mode 100644 index 00000000..e7a25714 --- /dev/null +++ b/playbooks/deploy_capi_mgmt.yml @@ -0,0 +1,14 @@ +##### +# This playbook turns the target Kubernetes cluster into a CAPI management cluster +##### + +- hosts: azimuth_deploy + roles: + - role: stackhpc.azimuth_ops.alertmanager_config + when: >- + alertmanager_config_slack_webhook_url is defined and + alertmanager_config_slack_webhook_url + - role: stackhpc.azimuth_ops.certmanager + - role: stackhpc.azimuth_ops.clusterapi + environment: + KUBECONFIG: "{{ kubeconfig_path | default('') }}" diff --git a/playbooks/provision.yml b/playbooks/provision.yml index b03ed875..60b93e9a 100644 --- a/playbooks/provision.yml +++ b/playbooks/provision.yml @@ -3,86 +3,8 @@ ##### -# Provision the node using Terraform -- hosts: terraform_provision - roles: - - stackhpc.azimuth_ops.infra - vars: - infra_ansible_groups: [k3s, azimuth_deploy] - - -# Configure the node as a K3S cluster -- hosts: k3s - tasks: - - include_role: - name: stackhpc.azimuth_ops.community_images - - - block: - - include_role: - name: stackhpc.azimuth_ops.k3s - - - name: Get installed Kubernetes version - command: k3s kubectl version --output json - changed_when: false - register: k3s_kubectl_version - - - name: Set kubectl version fact - set_fact: - kubectl_version: "{{ (k3s_kubectl_version.stdout | from_json).serverVersion.gitVersion.split('+') | first }}" - - - include_role: - name: stackhpc.azimuth_ops.kubectl - - - include_role: - name: stackhpc.azimuth_ops.helm - - - include_role: - name: stackhpc.azimuth_ops.kustomize - - - name: Slurp kubeconfig file - slurp: - src: /etc/rancher/k3s/k3s.yaml - register: k3s_kubeconfig - become: yes - - - name: Ensure kube config directory exists - file: - path: "{{ ansible_env.HOME }}/.kube" - state: directory - mode: u=rwx,g=rx,o=rx - - - name: Write kubeconfig file - copy: - content: "{{ k3s_kubeconfig.content | b64decode }}" - dest: "{{ ansible_env.HOME }}/.kube/config" - mode: u=rwx,g=,o= - - # For a single node install, we put the monitoring and ingress controller on the K3S cluster - - block: - # Must be done before NGINX ingress so that the ServiceMonitor CRD exists - - include_role: - name: stackhpc.azimuth_ops.kube_prometheus_stack - - - include_role: - name: stackhpc.azimuth_ops.ingress_nginx - when: install_mode == 'singlenode' - - # Configure the K3S cluster as a Cluster API management cluster when doing a HA installation - - block: - - include_role: - name: stackhpc.azimuth_ops.certmanager - vars: - certmanager_monitoring_enabled: no - certmanager_acmehttp01issuer_enabled: no - - - include_role: - name: stackhpc.azimuth_ops.clusterapi - - - include_role: - name: stackhpc.azimuth_ops.capi_cluster - vars: - capi_cluster_kubeconfig_path: "{{ ansible_env.HOME }}/kubeconfig-{{ capi_cluster_release_name }}.yaml" - when: install_mode == 'ha' +# Provision the Kubernetes cluster onto which Azimuth will be deployed +- import_playbook: stackhpc.azimuth_ops.provision_cluster # Install Azimuth diff --git a/playbooks/provision_capi_mgmt.yml b/playbooks/provision_capi_mgmt.yml new file mode 100644 index 00000000..84ef25a2 --- /dev/null +++ b/playbooks/provision_capi_mgmt.yml @@ -0,0 +1,21 @@ +##### +# This playbook uses Terraform and Cluster API to provision a CAPI management cluster +# The CAPI management cluster can be either single-node or HA +##### + + +# Provision the Kubernetes cluster onto which Azimuth will be deployed +- import_playbook: stackhpc.azimuth_ops.provision_cluster + + +# Install Azimuth +- import_playbook: stackhpc.azimuth_ops.deploy_capi_mgmt + vars: + # In HA mode, use the kubeconfig for the HA cluster + # In single node mode, use the default kubeconfig file + kubeconfig_path: >- + {{- + "{}/kubeconfig-{}.yaml".format(ansible_env.HOME, capi_cluster_release_name) + if install_mode == 'ha' + else "" + }} diff --git a/playbooks/provision_cluster.yml b/playbooks/provision_cluster.yml new file mode 100644 index 00000000..3b41f6a9 --- /dev/null +++ b/playbooks/provision_cluster.yml @@ -0,0 +1,86 @@ +##### +# This playbook uses Terraform and Cluster API to provision infrastructure onto which Azimuth is deployed +##### + + +# Provision the node using Terraform +- hosts: terraform_provision + roles: + - stackhpc.azimuth_ops.infra + vars: + infra_ansible_groups: [k3s, azimuth_deploy] + + +# Configure the node as a K3S cluster +- hosts: k3s + tasks: + - include_role: + name: stackhpc.azimuth_ops.community_images + + - block: + - include_role: + name: stackhpc.azimuth_ops.k3s + + - name: Get installed Kubernetes version + command: k3s kubectl version --output json + changed_when: false + register: k3s_kubectl_version + + - name: Set kubectl version fact + set_fact: + kubectl_version: "{{ (k3s_kubectl_version.stdout | from_json).serverVersion.gitVersion.split('+') | first }}" + + - include_role: + name: stackhpc.azimuth_ops.kubectl + + - include_role: + name: stackhpc.azimuth_ops.helm + + - include_role: + name: stackhpc.azimuth_ops.kustomize + + - name: Slurp kubeconfig file + slurp: + src: /etc/rancher/k3s/k3s.yaml + register: k3s_kubeconfig + become: yes + + - name: Ensure kube config directory exists + file: + path: "{{ ansible_env.HOME }}/.kube" + state: directory + mode: u=rwx,g=rx,o=rx + + - name: Write kubeconfig file + copy: + content: "{{ k3s_kubeconfig.content | b64decode }}" + dest: "{{ ansible_env.HOME }}/.kube/config" + mode: u=rwx,g=,o= + + # For a single node install, we put the monitoring and ingress controller on the K3S cluster + - block: + # Must be done before NGINX ingress so that the ServiceMonitor CRD exists + - include_role: + name: stackhpc.azimuth_ops.kube_prometheus_stack + + - include_role: + name: stackhpc.azimuth_ops.ingress_nginx + when: "ingress_controller_enabled | default(true)" + when: install_mode == 'singlenode' + + # Configure the K3S cluster as a Cluster API management cluster when doing a HA installation + - block: + - include_role: + name: stackhpc.azimuth_ops.certmanager + vars: + certmanager_monitoring_enabled: no + certmanager_acmehttp01issuer_enabled: no + + - include_role: + name: stackhpc.azimuth_ops.clusterapi + + - include_role: + name: stackhpc.azimuth_ops.capi_cluster + vars: + capi_cluster_kubeconfig_path: "{{ ansible_env.HOME }}/kubeconfig-{{ capi_cluster_release_name }}.yaml" + when: install_mode == 'ha' diff --git a/roles/capi_cluster/defaults/main.yml b/roles/capi_cluster/defaults/main.yml index b5cf5948..c08df417 100644 --- a/roles/capi_cluster/defaults/main.yml +++ b/roles/capi_cluster/defaults/main.yml @@ -119,6 +119,8 @@ capi_cluster_control_plane_flavor: >- capi_cluster_worker_flavor: >- {{ undef(hint = 'capi_cluster_worker_flavor is required') }} +# The number of control plane nodes to use +capi_cluster_control_plane_count: 3 # The number of workers to use capi_cluster_worker_count: 3 # max_unavailable and max_surge are set so that a new worker is made available to the cluster before one is deleted @@ -155,10 +157,16 @@ capi_cluster_control_plane_root_volume_availability_zone: "{{ capi_cluster_root_ capi_cluster_worker_root_volume_availability_zone: "{{ capi_cluster_root_volume_availability_zone }}" # Configuration for addons +# Determines if the ingress controller should be enabled +capi_cluster_addons_ingress_enabled: "{{ ingress_controller_enabled | default(true) }}" # Require the specification of a pre-allocated IP for the ingress load balancer #  This IP should have the wildcard domain assigned to it capi_cluster_addons_ingress_load_balancer_ip: >- - {{ undef(hint = 'capi_cluster_addons_ingress_load_balancer_ip is required') }} + {{- + undef(hint = 'capi_cluster_addons_ingress_load_balancer_ip is required') + if capi_cluster_addons_ingress_enabled + else None + }} # Options for LoadBalancer services #  https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/openstack-cloud-controller-manager/using-openstack-cloud-controller-manager.md#load-balancer @@ -261,6 +269,7 @@ capi_cluster_release_defaults: ) }} controlPlane: + machineCount: "{{ capi_cluster_control_plane_count }}" machineFlavor: "{{ capi_cluster_control_plane_flavor }}" omitFailureDomain: "{{ capi_cluster_control_plane_omit_failure_domain }}" failureDomains: "{{ capi_cluster_control_plane_failure_domains }}" @@ -323,14 +332,27 @@ capi_cluster_release_defaults: mellanoxNetworkOperator: enabled: false # The NGINX ingress controller is required - ingress: - enabled: true - nginx: - release: - values: - controller: - service: - loadBalancerIP: "{{ capi_cluster_addons_ingress_load_balancer_ip }}" + ingress: >- + {{- + { "enabled": capi_cluster_addons_ingress_enabled } | + combine( + { + "nginx": { + "release": { + "values": { + "controller": { + "service": { + "loadBalancerIP": capi_cluster_addons_ingress_load_balancer_ip, + }, + }, + }, + }, + }, + } + if capi_cluster_addons_ingress_enabled + else {} + ) + }} # Configure monitoring and alerting monitoring: enabled: true diff --git a/roles/capi_cluster/tasks/main.yml b/roles/capi_cluster/tasks/main.yml index c102ad4c..8b09377e 100644 --- a/roles/capi_cluster/tasks/main.yml +++ b/roles/capi_cluster/tasks/main.yml @@ -30,6 +30,18 @@ retries: 360 delay: 10 + - name: Wait for machine deployments to be running + command: >- + kubectl wait machinedeployments --all + --for=jsonpath='{.status.phase}'=Running + --namespace {{ capi_cluster_release_namespace }} + --timeout=0 + changed_when: false + register: capi_cluster_mds_running + until: capi_cluster_mds_running is succeeded + retries: 360 + delay: 10 + - name: Wait for addons to deploy command: >- kubectl wait {{ item }} --all @@ -40,7 +52,6 @@ changed_when: false register: capi_cluster_addons_complete until: capi_cluster_addons_complete is succeeded - # Wait up to 60 mins for the addons to deploy retries: 360 delay: 10 loop: