diff --git a/defaults/main.yml b/defaults/main.yml index 594239aa..fbfb64e5 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -13,11 +13,9 @@ ha_cluster_start_on_boot: true ha_cluster_extra_packages: [] -ha_cluster_fence_agent_packages: "{{ - ['fence-agents-all'] - + - (['fence-virt'] if ansible_architecture == 'x86_64' else []) - }}" +# Default fence agent packages are defined in respective os_family var files +ha_cluster_fence_agent_packages: + "{{ __ha_cluster_fence_agent_packages_default }}" ha_cluster_hacluster_password: "" ha_cluster_regenerate_keys: false diff --git a/tasks/distribute-fence-virt-key.yml b/tasks/distribute-fence-virt-key.yml index a4a9d62d..2ff89928 100644 --- a/tasks/distribute-fence-virt-key.yml +++ b/tasks/distribute-fence-virt-key.yml @@ -4,7 +4,7 @@ file: path: /etc/cluster state: directory - mode: 0755 + mode: '0755' - name: Get fence_xvm.key include_tasks: presharedkey.yml @@ -20,4 +20,4 @@ dest: /etc/cluster/fence_xvm.key owner: root group: root - mode: 0600 + mode: '0600' diff --git a/tasks/enable-repositories/Suse.yml b/tasks/enable-repositories/Suse.yml new file mode 100644 index 00000000..c07e39f8 --- /dev/null +++ b/tasks/enable-repositories/Suse.yml @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: MIT +--- +# All required repositories are already part of SLES for SAP 15 SP5+. diff --git a/tasks/main.yml b/tasks/main.yml index 99e05dd8..3e20c8a3 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -32,8 +32,8 @@ when: - ha_cluster_hacluster_password | string | length > 0 -- name: Configure pcs / pcsd - include_tasks: shell_{{ ha_cluster_pacemaker_shell }}/pcs-configure-pcs-pcsd.yml # yamllint disable-line rule:line-length +- name: Configure shell + include_tasks: shell_{{ ha_cluster_pacemaker_shell }}/configure-shell.yml # yamllint disable-line rule:line-length - name: Configure firewall and selinux when: ha_cluster_cluster_present | bool or ha_cluster_qnetd.present | d(false) @@ -57,8 +57,7 @@ + ha_cluster_sbd_enabled | ternary(__ha_cluster_sbd_packages, []) + - ha_cluster_fence_agent_packages - }}" + ha_cluster_fence_agent_packages }}" state: present use: "{{ (__ha_cluster_is_ostree | d(false)) | ternary('ansible.posix.rhel_rpm_ostree', omit) }}" @@ -74,10 +73,10 @@ - name: Configure corosync include_tasks: shell_{{ ha_cluster_pacemaker_shell }}/cluster-setup-corosync.yml # yamllint disable-line rule:line-length - - name: Pcs auth + - name: Cluster auth # Auth is run after corosync.conf has been distributed so that pcs # distributes pcs tokens in the cluster automatically. - include_tasks: shell_{{ ha_cluster_pacemaker_shell }}/pcs-auth.yml + include_tasks: shell_{{ ha_cluster_pacemaker_shell }}/cluster-auth.yml - name: Distribute cluster shared keys # This is run after pcs auth, so that the nodes are authenticated against @@ -93,6 +92,9 @@ - name: Create and push CIB include_tasks: shell_{{ ha_cluster_pacemaker_shell }}/create-and-push-cib.yml # yamllint disable-line rule:line-length + # CIB changes should be done only on one of cluster nodes to avoid + # corruption and inconsistency of resulting cibadmin patch file. + run_once: true - name: Remove cluster configuration when: not ha_cluster_cluster_present diff --git a/tasks/shell_crmsh/.gitkeep b/tasks/shell_crmsh/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/tasks/shell_crmsh/check-and-prepare-role-variables.yml b/tasks/shell_crmsh/check-and-prepare-role-variables.yml new file mode 100644 index 00000000..08b2a75f --- /dev/null +++ b/tasks/shell_crmsh/check-and-prepare-role-variables.yml @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: MIT +--- +- name: Check cluster configuration variables + block: + - name: Fail if passwords are not specified + ansible.builtin.fail: + msg: "{{ item }} must be specified" + when: + - lookup("vars", item, default="") | string | length < 1 + - ha_cluster_cluster_present | bool + loop: + - ha_cluster_hacluster_password + run_once: true + + - name: Fail if nodes do not have the same number of SBD devices specified + ansible.builtin.fail: + msg: All nodes must have the same number of SBD devices specified + when: + - ha_cluster_cluster_present | bool + - ha_cluster_sbd_enabled | bool + - > + ansible_play_hosts + | map('extract', hostvars, ['ha_cluster', 'sbd_devices']) + | map('default', [], true) + | map('length') | unique | length > 1 + run_once: true + + # Running a qnetd on a cluster node does't make sense, fencing would make + # the qnetd unavailable, even if temporarily. + - name: Fail if configuring qnetd on a cluster node + ansible.builtin.fail: + msg: > + Qnetd cannot be configured on a cluster node - + 'ha_cluster_cluster_present' and 'ha_cluster_qnetd.present' cannot + be both set to true + when: + - ha_cluster_cluster_present | bool + - ha_cluster_qnetd.present | d(false) + + - name: Fail if no valid level is specified for a fencing level + ansible.builtin.fail: + msg: Specify 'level' 1..9 for each fencing level + when: + - not((item.level | d() | int) > 0 and (item.level | d() | int) < 10) + loop: "{{ ha_cluster_stonith_levels }}" + run_once: true + + - name: Fail if no target is specified for a fencing level + ansible.builtin.fail: + msg: > + Specify exactly one of 'target', 'target_pattern', 'target_attribute' + for each fencing level + when: + - > + [item.target is defined, + item.target_pattern is defined, + item.target_attribute is defined] + | select | list | length != 1 + loop: "{{ ha_cluster_stonith_levels }}" + run_once: true + +- name: Collect service information + ansible.builtin.service_facts: + +- name: Assert that required services are available + ansible.builtin.assert: + that: "'{{ item }}' in ansible_facts.services" + fail_msg: >- + The service '{{ item }}' was not found on this system. Ensure that this + service is available before running this role. + success_msg: >- + The service '{{ item }}' was discovered on this system. + loop: + - 'logd.service' + +- name: Discover cluster node names + ansible.builtin.set_fact: + __ha_cluster_node_name: "{{ ha_cluster.node_name | d(inventory_hostname) }}" + +- name: Collect cluster node names + ansible.builtin.set_fact: + __ha_cluster_all_node_names: "{{ + ansible_play_hosts + | map('extract', hostvars, '__ha_cluster_node_name') + | list + }}" + +- name: Extract qdevice settings + ansible.builtin.set_fact: + __ha_cluster_qdevice_in_use: "{{ 'device' in ha_cluster_quorum }}" + __ha_cluster_qdevice_model: "{{ ha_cluster_quorum.device.model | d('') }}" + # This may set empty value, if it is not defined. Such value is not valid. + # It will be caught by crm validation before we try using it in the role. + __ha_cluster_qdevice_host: "{{ + ha_cluster_quorum.device.model_options | d([]) + | selectattr('name', 'match', '^host$') + | map(attribute='value') | list | last | d('') + }}" + __ha_cluster_qdevice_crm_address: "{{ + ha_cluster_quorum.device.model_options | d([]) + | selectattr('name', 'match', '^crm-address$') + | map(attribute='value') | list | last | d('') + }}" + +- name: Figure out if ATB needs to be enabled for SBD + ansible.builtin.set_fact: + # SBD needs ATB enabled if all of these are true: + # - sbd does not use devices (In check-and-prepare-role-variables.yml it + # is verified that all nodes have the same number of devices defined. + # Therefore it is enough to check devices of any single node.) + # - number of nodes is even + # - qdevice is not used + __ha_cluster_sbd_needs_atb: "{{ + ha_cluster_sbd_enabled + and not ha_cluster.sbd_devices | d([]) + and __ha_cluster_all_node_names | length is even + and not __ha_cluster_qdevice_in_use + }}" + +- name: Fail if SBD needs ATB enabled and the user configured ATB to be disabled + ansible.builtin.fail: + msg: Cannot set auto_tie_breaker to disabled when SBD needs it to be enabled + when: + - __ha_cluster_sbd_needs_atb | bool + - ha_cluster_quorum.options | d([]) + | selectattr('name', 'match', '^auto_tie_breaker$') + | map(attribute='value') | select('in', ['0', 0]) | list | length > 0 diff --git a/tasks/shell_crmsh/cluster-auth.yml b/tasks/shell_crmsh/cluster-auth.yml new file mode 100644 index 00000000..76e08016 --- /dev/null +++ b/tasks/shell_crmsh/cluster-auth.yml @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: MIT +--- +# Placeholder for potential auth tasks for crmsh +# There are no authentication steps for crmsh currently. diff --git a/tasks/shell_crmsh/cluster-destroy-crm.yml b/tasks/shell_crmsh/cluster-destroy-crm.yml new file mode 100644 index 00000000..ea3e60d1 --- /dev/null +++ b/tasks/shell_crmsh/cluster-destroy-crm.yml @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: MIT +--- +- name: Get stat of cluster configuration files + ansible.builtin.stat: + path: "{{ item }}" + loop: + - /etc/corosync/corosync.conf + - /var/lib/pacemaker/cib/cib.xml + register: __ha_cluster_config_files_stat + +- name: Stop cluster + ansible.builtin.command: + cmd: crm cluster stop --all + when: not __ha_cluster_config_files_stat.results | + selectattr('stat.exists', 'equalto', false) | list | length > 0 + changed_when: true + +- name: Stop cluster daemons + ansible.builtin.service: + name: "{{ item }}" + state: stopped # noqa no-handler + loop: + - pacemaker + - corosync + - corosync-qdevice + +- name: Backup configuration files by renaming to _backup + ansible.builtin.copy: + src: "{{ config_file.item }}" + dest: "/root/{{ config_file.stat.path | basename }}_backup" + owner: root + group: root + mode: '0600' + remote_src: true + backup: true + loop: "{{ __ha_cluster_config_files_stat.results }}" + loop_control: + loop_var: config_file + when: config_file.stat.exists + +- name: Remove cluster configuration files + ansible.builtin.file: + path: "{{ config_file.item }}" + state: absent + loop: "{{ __ha_cluster_config_files_stat.results }}" + loop_control: + loop_var: config_file + when: config_file.stat.exists + +- name: Find all files in /var/lib/pacemaker/cib/ + ansible.builtin.find: + paths: /var/lib/pacemaker/cib + recurse: true + patterns: + - 'cib*' + - 'shadow*' + register: __ha_cluster_cib_files + +- name: Remove all files in /var/lib/pacemaker/cib/ + ansible.builtin.file: + path: "{{ item.path }}" + state: absent + loop: "{{ __ha_cluster_cib_files.files }}" diff --git a/tasks/shell_crmsh/cluster-setup-corosync.yml b/tasks/shell_crmsh/cluster-setup-corosync.yml new file mode 100644 index 00000000..44991303 --- /dev/null +++ b/tasks/shell_crmsh/cluster-setup-corosync.yml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: MIT +--- +- name: Create a corosync.conf tempfile + ansible.builtin.tempfile: + state: file + suffix: _ha_cluster_corosync_conf + register: __ha_cluster_tempfile_corosync_conf + run_once: true # noqa: run_once[task] + # We always need to create corosync.conf file to see whether it's the same as + # what is already present on the cluster nodes. However, we don't want to + # report it as a change since the only thing which matters is copying the + # resulting corosync.conf to cluster nodes. + check_mode: false + changed_when: not ansible_check_mode + +- name: Generate corosync.conf using template + ansible.builtin.template: + src: crmsh_corosync.j2 + dest: "{{ __ha_cluster_tempfile_corosync_conf.path }}" + owner: root + group: root + mode: '0644' + run_once: true # noqa: run_once[task] + +- name: Fetch created corosync.conf file + ansible.builtin.slurp: + src: "{{ __ha_cluster_tempfile_corosync_conf.path }}" + register: __ha_cluster_data_corosync_conf + run_once: true # noqa: run_once[task] + when: __ha_cluster_tempfile_corosync_conf.path is defined + +- name: Distribute corosync.conf file + ansible.builtin.copy: + content: "{{ __ha_cluster_data_corosync_conf['content'] | b64decode }}" + dest: /etc/corosync/corosync.conf + owner: root + group: root + mode: '0644' + register: __ha_cluster_distribute_corosync_conf + when: __ha_cluster_data_corosync_conf is defined + +- name: Remove a corosync.conf tempfile + ansible.builtin.file: + path: "{{ __ha_cluster_tempfile_corosync_conf.path }}" + state: absent + when: __ha_cluster_tempfile_corosync_conf.path is defined + run_once: true # noqa: run_once[task] + # We always need to create corosync.conf file to see whether it's the same as + # what is already present on the cluster nodes. However, we don't want to + # report it as a change since the only thing which matters is copying the + # resulting corosync.conf to cluster nodes. + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/cluster-setup-keys.yml b/tasks/shell_crmsh/cluster-setup-keys.yml new file mode 100644 index 00000000..39b591f6 --- /dev/null +++ b/tasks/shell_crmsh/cluster-setup-keys.yml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: MIT +--- +- name: Get corosync authkey + ansible.builtin.include_tasks: ../presharedkey.yml + vars: + preshared_key_label: corosync authkey + preshared_key_src: "{{ ha_cluster_corosync_key_src }}" + preshared_key_dest: /etc/corosync/authkey + preshared_key_length: 256 + +- name: Distribute corosync authkey + ansible.builtin.copy: + content: "{{ __ha_cluster_some_preshared_key | b64decode }}" + dest: /etc/corosync/authkey + owner: root + group: root + mode: '0400' + register: __ha_cluster_distribute_corosync_authkey + no_log: true + +- name: Get pacemaker authkey + ansible.builtin.include_tasks: ../presharedkey.yml + vars: + preshared_key_label: pacemaker authkey + preshared_key_src: "{{ ha_cluster_pacemaker_key_src }}" + preshared_key_dest: /etc/pacemaker/authkey + preshared_key_length: 256 + +- name: Distribute pacemaker authkey + ansible.builtin.copy: + content: "{{ __ha_cluster_some_preshared_key | b64decode }}" + dest: /etc/pacemaker/authkey + owner: hacluster + group: haclient + mode: '0400' + register: __ha_cluster_distribute_pacemaker_authkey + no_log: true diff --git a/tasks/shell_crmsh/cluster-start-and-reload.yml b/tasks/shell_crmsh/cluster-start-and-reload.yml new file mode 100644 index 00000000..44279b40 --- /dev/null +++ b/tasks/shell_crmsh/cluster-start-and-reload.yml @@ -0,0 +1,109 @@ +# SPDX-License-Identifier: MIT +--- +- name: Get services status - detect corosync-qdevice + ansible.builtin.service_facts: + +- name: Stop cluster daemons to reload configuration + ansible.builtin.service: + name: "{{ item }}" + state: stopped # noqa no-handler + loop: + - pacemaker + - corosync + - corosync-qdevice + when: + - > + __ha_cluster_distribute_corosync_conf.changed + or __ha_cluster_distribute_corosync_authkey.changed + or __ha_cluster_distribute_pacemaker_authkey.changed + or (__ha_cluster_sbd_service_enable_disable.changed | d(false)) + or (__ha_cluster_distribute_sbd_config.changed | d(false)) + or (__ha_cluster_qdevice_certs.changed | d(false)) + - > + item != 'corosync-qdevice' + or 'corosync-qdevice.service' in ansible_facts.services + + +# We must always start daemons to get the cluster running on newly added nodes. + +- name: Start corosync + ansible.builtin.service: + name: corosync + state: started + +# To prevent corner cases, always reload the config. It is sufficient to run +# the reload command on one node. Corosync then reloads config on all cluster +# nodes. If there was no change in corosync.conf, the reload is an empty +# operation. +- name: Reload corosync configuration + ansible.builtin.command: + cmd: corosync-cfgtool -R + run_once: true # noqa: run_once[task] + when: not ansible_check_mode + changed_when: false + +- name: Start corosync-qdevice + ansible.builtin.service: + name: corosync-qdevice + state: started + when: __ha_cluster_qdevice_in_use + +- name: Start pacemaker + ansible.builtin.service: + name: pacemaker + state: started + +# crm cluster should be started already by previous service start +- name: Start pacemaker cluster + ansible.builtin.command: + cmd: crm cluster start + changed_when: true + +- name: Create cluster query string + ansible.builtin.set_fact: + __ha_cluster_node_count: + "{{ ansible_play_hosts_all | length }} nodes configured" + +- name: Wait for the cluster to show all cluster nodes + ansible.builtin.command: + cmd: crm status + run_once: true # noqa: run_once[task] + when: not ansible_check_mode + changed_when: false + register: __ha_cluster_crm_output_nodes + until: __ha_cluster_crm_output_nodes.stdout is regex(__ha_cluster_node_count) + # 2 minutes retry loop for cluster to initialize + retries: 12 + delay: 10 + timeout: 120 + +- name: Wait for the cluster to show Online nodes + ansible.builtin.command: + cmd: crm status + run_once: true # noqa: run_once[task] + when: not ansible_check_mode + changed_when: false + register: __ha_cluster_crm_output_online + until: __ha_cluster_crm_output_online.stdout is regex("Online:") + # 2 minutes retry loop for cluster nodes to come up + retries: 12 + delay: 10 + timeout: 120 + +- name: Output current cluster status + ansible.builtin.debug: + var: __ha_cluster_crm_output_online.stdout_lines + +- name: Filter out crm status output + ansible.builtin.set_fact: + __ha_cluster_crm_status_online: + "{{ __ha_cluster_crm_output_online.stdout_lines | + select('search', 'Online:') | list }}" + +- name: Fail if one of nodes is in Offline status + ansible.builtin.fail: + msg: "Cluster start failed and one or more nodes are Offline. {{ item }}" + loop: "{{ ansible_play_hosts_all }}" + when: + - __ha_cluster_crm_status_online is defined + - item not in __ha_cluster_crm_status_online | join(' ') diff --git a/tasks/shell_crmsh/configure-shell.yml b/tasks/shell_crmsh/configure-shell.yml new file mode 100644 index 00000000..3bccd43a --- /dev/null +++ b/tasks/shell_crmsh/configure-shell.yml @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: MIT +--- +# Placeholder for potential configure tasks for crmsh +# There are no configure steps for crmsh currently. diff --git a/tasks/shell_crmsh/create-and-push-cib.yml b/tasks/shell_crmsh/create-and-push-cib.yml new file mode 100644 index 00000000..52c0e539 --- /dev/null +++ b/tasks/shell_crmsh/create-and-push-cib.yml @@ -0,0 +1,290 @@ +# SPDX-License-Identifier: MIT +--- +# We always need to create CIB to see whether it's the same as what is already +# present in the cluster. However, we don't want to report it as a change since +# the only thing which matters is pushing the resulting CIB to the cluster. + +# Create backup of current cib in /root +- name: Gather facts for ansible_date_time + ansible.builtin.setup: + filter: + - 'ansible_date_time' + +# Prepare CIB files +- name: Create a tempfile for original CIB + ansible.builtin.tempfile: + state: file + suffix: _ha_cluster_original_cib_xml + register: __ha_cluster_tempfile_original_cib_xml + check_mode: false + changed_when: not ansible_check_mode + +- name: Create a tempfile for new CIB + ansible.builtin.tempfile: + state: file + suffix: _ha_cluster_cib_xml + register: __ha_cluster_tempfile_cib_xml + check_mode: false + changed_when: not ansible_check_mode + +# Maintenance mode is required, because CIB version changes with cluster +# status changes, resulting in shadow CIB outdated and unable to patch. +# Sleep is implemented to ensure that cluster have enough time to freeze +# to ensure CIB export consistency. +# Meta-attrs is-managed will conflict with maintenance mode. Option n +# will skip their deletion. +- name: Put cluster in maintenance mode to freeze cib changes + ansible.builtin.expect: + command: crm configure property maintenance-mode=true + responses: + ".*is-managed.*": "n" + run_once: true # noqa: run_once[task] + check_mode: false + changed_when: true + +- name: Verify that maintenace-mode is true + ansible.builtin.command: + cmd: crm status + register: __ha_cluster_crm_status_maint + retries: 10 + delay: 5 + until: + '"Resource management is DISABLED" in __ha_cluster_crm_status_maint.stdout' + check_mode: false + changed_when: false + run_once: true # noqa: run_once[task] + +- name: Fetch CIB configuration + ansible.builtin.command: + cmd: cibadmin --query + register: __ha_cluster_fetch_cib + check_mode: false + changed_when: false # this is a read-only command + +- name: Write CIB configuration + ansible.builtin.copy: + content: "{{ __ha_cluster_fetch_cib.stdout }}" + dest: "{{ item }}" + owner: root + group: root + mode: '0600' + loop: + - "{{ __ha_cluster_tempfile_cib_xml.path }}" + - "{{ __ha_cluster_tempfile_original_cib_xml.path }}" + check_mode: false + changed_when: not ansible_check_mode + +# Starting with an empty CIB would remove all nodes and other parts of CIB +# automatically created by pacemaker. That would effectively make the role to +# report changed == True every time. Therefore, we start with the current +# cluster CIB and purge it instead, thus keeping all the automatically created +# parts in place and report only actual configuration changes. +- name: Purge new CIB configuration + ansible.builtin.command: + cmd: > + cibadmin --force --delete-all --xpath + '/cib/configuration/*[not( + self::crm_config or + self::nodes or + self::resources or + self::constraints + )] + | /cib/configuration/*[self::resources or self::constraints]/* + | /cib/configuration/nodes/*/* + | /cib/configuration/crm_config//nvpair[not( + @name="cluster-infrastructure" or + @name="cluster-name" or + @name="dc-version" or + @name="have-watchdog" or + @name="last-lrm-refresh" or + @name="stonith-watchdog-timeout" + )]' + environment: + CIB_file: "{{ __ha_cluster_tempfile_cib_xml.path }}" + check_mode: false + changed_when: not ansible_check_mode + run_once: true # noqa: run_once[task] + +# Create/Replace shadow configuration with new based on current cluster. +- name: Create new shadow crm configuration with force + ansible.builtin.command: + cmd: crm configure cib new {{ __ha_cluster_crm_shadow }} --force + check_mode: false + changed_when: not ansible_check_mode + +# Build the new CIB +- name: Build the new CIB + block: + ## Cluster properties + - name: Configure cluster properties + ansible.builtin.include_tasks: crm-cluster-properties.yml + vars: + properties_set: "{{ ha_cluster_cluster_properties[0] }}" + when: ha_cluster_cluster_properties[0].attrs | d([]) + + ## Resources + - name: Configure cluster resources + ansible.builtin.include_tasks: crm-cib-resource-primitive.yml + vars: + resource: "{{ item }}" + loop: "{{ ha_cluster_resource_primitives }}" + + - name: Configure cluster resource groups + ansible.builtin.include_tasks: crm-cib-resource-group.yml + vars: + resource_group: "{{ item }}" + loop: "{{ ha_cluster_resource_groups }}" + + - name: Configure cluster resource clones + ansible.builtin.include_tasks: crm-cib-resource-clone.yml + vars: + resource_clone: "{{ item }}" + loop: "{{ ha_cluster_resource_clones }}" + + ## Stonith levels - fencing_topology + - name: Configure stonith levels - fencing_topology + include_tasks: crm-cib-stonith-level.yml + when: ha_cluster_stonith_levels + + ## Constraints + - name: Configure resource location constraints + ansible.builtin.include_tasks: crm-cib-constraint-location.yml + loop: "{{ ha_cluster_constraints_location }}" + loop_control: + index_var: constraint_index + loop_var: constraint + + - name: Configure resource colocation constraints + ansible.builtin.include_tasks: crm-cib-constraint-colocation.yml + when: not constraint.resource_sets | d() + loop: "{{ ha_cluster_constraints_colocation }}" + loop_control: + index_var: constraint_index + loop_var: constraint + + - name: Configure resource set colocation constraints + ansible.builtin.include_tasks: crm-cib-constraint-set.yml + vars: + constraint_type: colocation + when: constraint.resource_sets | d() + loop: "{{ ha_cluster_constraints_colocation }}" + loop_control: + index_var: constraint_index + loop_var: constraint + + - name: Configure resource order constraints + ansible.builtin.include_tasks: crm-cib-constraint-order.yml + when: not constraint.resource_sets | d() + loop: "{{ ha_cluster_constraints_order }}" + loop_control: + index_var: constraint_index + loop_var: constraint + + - name: Configure resource set order constraints + ansible.builtin.include_tasks: crm-cib-constraint-set.yml + vars: + constraint_type: order + when: constraint.resource_sets | d() + loop: "{{ ha_cluster_constraints_order }}" + loop_control: + index_var: constraint_index + loop_var: constraint + + - name: Configure resource ticket constraints + ansible.builtin.include_tasks: crm-cib-constraint-ticket.yml + when: not constraint.resource_sets | d() + loop: "{{ ha_cluster_constraints_ticket }}" + loop_control: + index_var: constraint_index + loop_var: constraint + + - name: Configure resource set ticket constraints + ansible.builtin.include_tasks: crm-cib-constraint-set.yml + vars: + constraint_type: ticket + when: constraint.resource_sets | d() + loop: "{{ ha_cluster_constraints_ticket }}" + loop_control: + index_var: constraint_index + loop_var: constraint + +# Push the new CIB into the cluster +- name: Copy shadow cib to temp + ansible.builtin.copy: + src: "/var/lib/pacemaker/cib/shadow.{{ __ha_cluster_crm_shadow }}" + dest: "{{ __ha_cluster_tempfile_cib_xml.path }}" + owner: root + group: root + mode: '0600' + remote_src: true + check_mode: false + changed_when: not ansible_check_mode + +- name: Create a tempfile for CIB diff + ansible.builtin.tempfile: + state: file + suffix: _ha_cluster_cib_diff + register: __ha_cluster_tempfile_cib_diff + check_mode: false + changed_when: not ansible_check_mode + +- name: Compare new and original CIB + ansible.builtin.command: + cmd: > + crm_diff --no-version + --original {{ __ha_cluster_tempfile_original_cib_xml.path }} + --new {{ __ha_cluster_tempfile_cib_xml.path }} + register: __ha_cluster_cib_diff + check_mode: false + changed_when: not ansible_check_mode + failed_when: + - __ha_cluster_cib_diff.rc != 0 # success, CIBs are the same + - __ha_cluster_cib_diff.rc != 1 # success, CIBs are not the same + run_once: true # noqa: run_once[task] + +- name: Write CIB diff to its tempfile + ansible.builtin.copy: + content: "{{ __ha_cluster_cib_diff.stdout }}" + dest: "{{ __ha_cluster_tempfile_cib_diff.path }}" + owner: root + group: root + mode: '0600' + check_mode: false + changed_when: not ansible_check_mode + when: __ha_cluster_cib_diff.rc == 1 + +# crm_diff is able to recognize same resources and constraints regardless if +# they were re-created and patch will not be executed when re-running. +- name: Push CIB diff to the cluster if it has any changes + ansible.builtin.command: + cmd: > + cibadmin --verbose --patch + --xml-file {{ __ha_cluster_tempfile_cib_diff.path | quote }} + register: __ha_cluster_cib_path_out + changed_when: not ansible_check_mode + failed_when: __ha_cluster_cib_path_out.rc != 0 + ignore_errors: true + when: __ha_cluster_cib_diff.rc == 1 + run_once: true # noqa: run_once[task] + +# Meta-attrs is-managed will conflict with maintenance mode. Option n +# will skip their deletion. +- name: Disable maintenance mode + ansible.builtin.expect: + command: crm configure property maintenance-mode=false + responses: + ".*is-managed.*": "n" + check_mode: false + changed_when: true + run_once: true # noqa: run_once[task] + +- name: Remove CIB tempfiles + ansible.builtin.file: + path: "{{ item.path }}" + state: absent + loop: + - "{{ __ha_cluster_tempfile_cib_xml }}" + - "{{ __ha_cluster_tempfile_original_cib_xml }}" + - "{{ __ha_cluster_tempfile_cib_diff }}" + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-constraint-colocation.yml b/tasks/shell_crmsh/crm-cib-constraint-colocation.yml new file mode 100644 index 00000000..d4771abc --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-constraint-colocation.yml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: MIT +--- +- name: Define colocation constraint.id '{{ + constraint.id | d(constraint_index) }}' + ansible.builtin.set_fact: + __ha_cluster_constraint_id: + "{{ constraint.id if constraint.id is defined else + (constraint.resource_leader.id | quote) + '-colocation' }}" + +# Verify if Shadow CIB already contains same constraint id. +- name: Verify colocation constraint presence {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure show {{ __ha_cluster_constraint_id }} + register: __ha_cluster_constraint_status + changed_when: false + failed_when: false + +# Delete constraint id in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete present colocation constraint {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure delete {{ __ha_cluster_constraint_id }} + when: __ha_cluster_constraint_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure colocation constraint {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure colocation {{ __ha_cluster_constraint_id }} + {% for option in constraint.options | d([]) if option.name == 'score' %} + {{ option.value | lower | replace('infinity', 'inf') | quote }}: + {% else %} + inf: + {% endfor %} + {{ constraint.resource_leader.id + | quote }} {{ constraint.resource_follower.id | quote }} + {% for option in constraint.options | d([]) if option.name != 'score' %} + {{ option.name | quote }}={{ option.value | quote }} + {% endfor %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-constraint-location.yml b/tasks/shell_crmsh/crm-cib-constraint-location.yml new file mode 100644 index 00000000..de56ed2e --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-constraint-location.yml @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: MIT +--- +- name: Define location constraint.id {{ constraint.id | d(constraint_index) }} + ansible.builtin.set_fact: + __ha_cluster_constraint_id: + "{{ constraint.id if constraint.id is defined + else (constraint.resource.pattern | regex_replace('[^A-Za-z0-9]', '') + | quote) + '-location' + if constraint.resource.pattern is defined + else (constraint.resource.id | quote) + '-location' }}" + +# Verify if Shadow CIB already contains same constraint id. +- name: Verify location constraint presence {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure show {{ __ha_cluster_constraint_id }} + register: __ha_cluster_constraint_status + changed_when: false + failed_when: false + +# Delete constraint id in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete present location constraint {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure delete {{ __ha_cluster_constraint_id }} + when: __ha_cluster_constraint_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure location constraint {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure location {{ __ha_cluster_constraint_id }} + {% if constraint.resource.pattern | d() %} + /{{ constraint.resource.pattern | quote }}/ + {% else %} + {{ constraint.resource.id | quote }} + {% endif %}\ + rule + {% for option in constraint.options | d([]) if option.name == 'score' %} + {{ option.value | lower | replace('infinity', 'inf') | quote }}: + {% else %} + inf: + {% endfor %} + {% if constraint.rule | d() %} + {{ constraint.rule }} + {% else %} + '\'#uname eq {{ constraint.node }} + {% endif %} + {% for option in constraint.options | d([]) if option.name != 'score' %} + {{ option.name | quote }}={{ option.value | quote }} + {% endfor %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-constraint-order.yml b/tasks/shell_crmsh/crm-cib-constraint-order.yml new file mode 100644 index 00000000..3b83fcd2 --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-constraint-order.yml @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: MIT +--- +- name: Define order constraint.id {{ constraint.id | d(constraint_index) }} + ansible.builtin.set_fact: + __ha_cluster_constraint_id: + "{{ constraint.id if constraint.id is defined else + (constraint.resource_first.id | quote) + '-order' }}" + +# Verify if Shadow CIB already contains same constraint id. +- name: Verify order constraint presence {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure show {{ __ha_cluster_constraint_id }} + register: __ha_cluster_constraint_status + changed_when: false + failed_when: false + +# Delete constraint id in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete present order constraint {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure delete {{ __ha_cluster_constraint_id }} + when: __ha_cluster_constraint_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure order constraint {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure order {{ __ha_cluster_constraint_id | quote }} + {{ constraint.resource_first.id + | quote }}:{{ constraint.resource_first.action | quote }} + {{ constraint.resource_then.id + | quote }}:{{ constraint.resource_then.action | quote }} + {% for option in constraint.options | d([]) if option.name != 'score' %} + {{ option.name | quote }}={{ option.value | quote }} + {% endfor %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-constraint-set.yml b/tasks/shell_crmsh/crm-cib-constraint-set.yml new file mode 100644 index 00000000..04fc2b89 --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-constraint-set.yml @@ -0,0 +1,70 @@ +# SPDX-License-Identifier: MIT +--- +- name: Define {{ constraint_type }} set constraint.id '{{ + constraint_index | string }}' # noqa name[template] + ansible.builtin.set_fact: + __ha_cluster_constraint_id: + "{{ constraint.id if constraint.id is defined else + constraint_type + '-set-' + constraint_index | string }}" + +# Verify if Shadow CIB already contains same constraint id. +- name: Verify constraint set presence {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure show {{ __ha_cluster_constraint_id }} + register: __ha_cluster_resource_status + changed_when: false + failed_when: false + +# Delete constraint id in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete present constraint set {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure delete {{ __ha_cluster_constraint_id }} + when: __ha_cluster_resource_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure {{ constraint_type }} constraint set '{{ + __ha_cluster_constraint_id }}' # noqa name[template] + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} configure + {{ constraint_type if constraint_type != 'ticket' else 'rsc_ticket' }} + {{ __ha_cluster_constraint_id }} + {% if constraint_type != 'ticket' %} + {% if constraint_type == 'order' %} + {% for option in constraint.options | d([]) + if option.name == 'kind' %} + {{ option.value | quote }}: + {% else %} + Mandatory: + {% endfor %} + {% elif constraint_type == 'colocation' %} + {% for option in constraint.options | d([]) + if option.name == 'score' %} + {{ option.value | lower | replace('infinity', 'inf') | quote }}: + {% else %} + inf: + {% endfor %} + {% endif %} + {% for set in constraint.resource_sets %} + ({% for resource in set.resource_ids %} + {{ resource | quote }} + {% endfor %}) + {% endfor %} + {% else %} + {% for set in constraint.resource_sets %} + {{ constraint.ticket | quote }}: + {% for resource in set.resource_ids %} + {{ resource | quote }} + {% endfor %} + {% endfor %} + {% endif %} + {% for option in constraint.options | d([]) %} + {{ option.name | quote }}={{ option.value | quote }} + {% endfor %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-constraint-ticket.yml b/tasks/shell_crmsh/crm-cib-constraint-ticket.yml new file mode 100644 index 00000000..2501e4dd --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-constraint-ticket.yml @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: MIT +--- +- name: Define ticket constraint.id {{ constraint.id | d(constraint_index) }} + ansible.builtin.set_fact: + __ha_cluster_constraint_id: + "{{ constraint.id if constraint.id is defined else + (constraint.resource.id | quote) + '-ticket' }}" + +# Verify if Shadow CIB already contains same constraint id. +- name: Verify ticket constraint presence {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure show {{ __ha_cluster_constraint_id }} + register: __ha_cluster_constraint_status + changed_when: false + failed_when: false + +# Delete constraint id in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete present ticket constraint {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure delete {{ __ha_cluster_constraint_id }} + when: __ha_cluster_constraint_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure ticket constraint {{ __ha_cluster_constraint_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure rsc_ticket {{ __ha_cluster_constraint_id }} + {{ constraint.ticket | quote }}: {{ constraint.resource.id | quote }} + {% for option in constraint.options | d([]) %} + {{ option.name | quote }}={{ option.value | quote }} + {% endfor %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-resource-clone.yml b/tasks/shell_crmsh/crm-cib-resource-clone.yml new file mode 100644 index 00000000..d633dc87 --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-resource-clone.yml @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: MIT +--- +- name: Define resouce clone resource_clone.id '{{ + resource_clone.id | d(resource_clone.resource_id + '-clone') }}' + ansible.builtin.set_fact: + __ha_cluster_resource_id: + "{{ resource_clone.id if resource_clone.id is defined + else resource_clone.resource_id + '-clone' }}" + +# Verify if Shadow CIB already contains same resource id. +- name: Verify resouce clone presence {{ __ha_cluster_resource_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure show {{ __ha_cluster_resource_id }} + register: __ha_cluster_resource_status + changed_when: false + failed_when: false + +# Delete resource id in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete present resouce clone {{ __ha_cluster_resource_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure delete {{ __ha_cluster_resource_id }} + when: __ha_cluster_resource_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure resource clone {{ __ha_cluster_resource_id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure clone {{ __ha_cluster_resource_id }} + {{ resource_clone.resource_id | quote }} \ + {% if resource_clone.meta_attrs[0].attrs | default(False) %} + meta + {% for attr in resource_clone.meta_attrs[0].attrs -%} + {{ attr.name | quote }}={{ attr.value | quote }} + {% endfor %} + {% endif %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-resource-group.yml b/tasks/shell_crmsh/crm-cib-resource-group.yml new file mode 100644 index 00000000..d96819ff --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-resource-group.yml @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: MIT +--- +# Verify if Shadow CIB already contains same resource id. +- name: Verify resource group presence {{ resource_group.id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure show {{ resource_group.id }} + register: __ha_cluster_resource_status + changed_when: false + failed_when: false + +# Delete resource id in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete present resource group {{ resource_group.id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure delete {{ resource_group.id }} + when: __ha_cluster_resource_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure resource group {{ resource_group.id }} + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} configure group + {{ resource_group.id | quote }} + {% for resource in resource_group.resource_ids %} + {{ resource | quote }} + {% endfor %} \ + {% if resource_group.meta_attrs[0].attrs | default(False) %} + meta + {% for attr in resource_group.meta_attrs[0].attrs -%} + {{ attr.name | quote }}={{ attr.value | quote }} + {% endfor %} + {% endif %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-resource-primitive.yml b/tasks/shell_crmsh/crm-cib-resource-primitive.yml new file mode 100644 index 00000000..a462469f --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-resource-primitive.yml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: MIT +--- +# Verify if Shadow CIB already contains same resource id. +- name: Verify resource primitive presence {{ resource.id }} + ansible.builtin.command: + cmd: crm -c {{ __ha_cluster_crm_shadow }} configure show {{ resource.id }} + register: __ha_cluster_resource_status + changed_when: false + failed_when: false + +# Delete resource id in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete present resource primitive {{ resource.id }} + ansible.builtin.command: + cmd: crm -c {{ __ha_cluster_crm_shadow }} configure delete {{ resource.id }} + when: __ha_cluster_resource_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure resource primitive {{ resource.id }} + ansible.builtin.command: # noqa jinja[spacing] + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} configure primitive + {{ resource.id | quote }} {{ resource.agent | quote }} \ + {% if resource.instance_attrs[0].attrs | default(False) %} + params + {% for attr in resource.instance_attrs[0].attrs -%} + {{ attr.name | quote }}={{ attr.value | quote }} + {% endfor %} + {% endif %}\ + {% if resource.meta_attrs[0].attrs | default(False) %} + meta + {% for attr in resource.meta_attrs[0].attrs -%} + {{ attr.name | quote }}={{ attr.value | quote }} + {% endfor %} + {% endif %} + {% if resource.operations | default(False) %}\ + {% for operation in resource.operations %} + {% if operation.action | default(False) %} + op {{ operation.action | quote }} + {% if operation.attrs | default(False) %} + {%- for attr in operation.attrs -%} + {{ attr.name | quote }}={{ attr.value | quote }} + {% endfor %} + {% if not loop.last %}\ + {% endif %} + {% endif %} + {% endif %} + {% endfor %} + {% endif %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cib-stonith-level.yml b/tasks/shell_crmsh/crm-cib-stonith-level.yml new file mode 100644 index 00000000..7888da71 --- /dev/null +++ b/tasks/shell_crmsh/crm-cib-stonith-level.yml @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: MIT +--- +# Verify if Shadow CIB already contains fencing_topology. +# crmsh has only one object fencing_topology so all levels are created together. +- name: Verify if fencing_topology is already present + ansible.builtin.command: + cmd: crm -c {{ __ha_cluster_crm_shadow }} configure show fencing_topology + register: __ha_cluster_resource_status + changed_when: false + failed_when: false + +# Delete fencing_topology in Shadow CIB to avoid errors during cibadmin patch. +- name: Delete clone if it is already present + ansible.builtin.command: + cmd: crm -c {{ __ha_cluster_crm_shadow }} configure delete fencing_topology + when: __ha_cluster_resource_status.rc == 0 + check_mode: false + changed_when: not ansible_check_mode + +- name: Configure fencing_topology + ansible.builtin.command: + cmd: > + crm -c {{ __ha_cluster_crm_shadow }} configure fencing_topology + {% for stonith_level in ha_cluster_stonith_levels -%} + {% if stonith_level.target | d() %} + {{ stonith_level.target | quote }}: + {% elif stonith_level.target_pattern | d() %} + regexp%{{ stonith_level.target_pattern | quote }}: + {% endif %} + {% for resource_id in stonith_level.resource_ids %} + {{ resource_id | quote }} + {% endfor %} + {% endfor %} + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/crm-cluster-properties.yml b/tasks/shell_crmsh/crm-cluster-properties.yml new file mode 100644 index 00000000..db28b3d8 --- /dev/null +++ b/tasks/shell_crmsh/crm-cluster-properties.yml @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: MIT +--- +- name: Configure cluster properties set + ansible.builtin.command: + cmd: | + crm -c {{ __ha_cluster_crm_shadow }} + configure property {{ item.name | quote }}={{ item.value | quote }} + loop: "{{ properties_set.attrs }}" + # Pause ensures that cluster is consistent for further property changes. + # Setting up crm properties without pause resulted in unstable cluster. + loop_control: + pause: 5 + retries: 10 + check_mode: false + changed_when: not ansible_check_mode diff --git a/tasks/shell_crmsh/pcs-qnetd.yml b/tasks/shell_crmsh/pcs-qnetd.yml new file mode 100644 index 00000000..a027ed9a --- /dev/null +++ b/tasks/shell_crmsh/pcs-qnetd.yml @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: MIT +--- +# Placeholder for qnetd/qdevice steps for crmsh. +# File will need to be renamed in main.yml. diff --git a/tasks/shell_crmsh/sbd.yml b/tasks/shell_crmsh/sbd.yml new file mode 100644 index 00000000..6a77cfcd --- /dev/null +++ b/tasks/shell_crmsh/sbd.yml @@ -0,0 +1,146 @@ +# SPDX-License-Identifier: MIT +--- +- name: Manage SBD + when: ha_cluster_sbd_enabled + block: + - name: Check if watchdog is configured when use_softdog is false + ansible.builtin.stat: + path: '/dev/watchdog' + register: __ha_cluster_watchdog_check + + - name: Assert that watchdog is configured when use_softdog is false + ansible.builtin.assert: + that: __ha_cluster_watchdog_check.stat.exists + fail_msg: >- + The role has been called with the 'ha_cluster.sbd_softdog' option set + to false. This expects the device '/dev/watchdog' to be available but + '/dev/watchdog' was not found. You must configure the watchdog before + running the role. If no watchdog is available, set + 'ha_cluster.sbd_softdog' to 'true' and the softdog will be + automatically configured. + success_msg: >- + The watchdog device was found. + when: + - "'softdog' not in ha_cluster.sbd_watchdog_modules" + + - name: Configure SBD watchdog + block: + - name: Configure and unload watchdog kernel modules from blocklist + block: + - name: Configure watchdog kernel module blocklist + ansible.builtin.lineinfile: + path: "/etc/modprobe.d/{{ item }}.conf" + create: true + mode: '0644' + # wokeignore:rule=blacklist + regexp: "^(options|blacklist) {{ item }}" + # wokeignore:rule=blacklist + line: "blacklist {{ item }}" + state: present + loop: "{{ ha_cluster.sbd_watchdog_modules_blocklist | d([]) }}" + + - name: Unload watchdog kernel modules from blocklist + community.general.modprobe: + name: "{{ item }}" + state: absent + loop: "{{ ha_cluster.sbd_watchdog_modules_blocklist | d([]) }}" + + - name: Configure and load watchdog kernel module + block: + - name: Configure watchdog kernel modules + ansible.builtin.lineinfile: + path: "/etc/modules-load.d/{{ item }}.conf" + create: true + mode: '0644' + regexp: "^{{ item }}" + line: "{{ item }}" + state: present + loop: "{{ ha_cluster.sbd_watchdog_modules | d([]) }}" + + - name: Load watchdog kernel modules + community.general.modprobe: + name: "{{ item }}" + state: present + loop: "{{ ha_cluster.sbd_watchdog_modules | d([]) }}" + + - name: Manage SBD devices + # Ideally, the block as a whole should run one node at a time. This does + # not seem to be possible with Ansible yet. Instead, we at least make the + # block's tasks run one by one. This way, we avoid possible issues caused + # by initializing one device from multiple host at the same time. Devices + # initialized before the role started will not be reinitialized. Devices + # not initialized before the role started will be initialized as many + # times as there are nodes. That, however, has no other side effect than + # suboptimal performance of the role. + throttle: 1 + block: + - name: Probe SBD devices + ansible.builtin.command: + cmd: sbd -d {{ item | quote }} dump + loop: "{{ ha_cluster.sbd_devices | d([]) }}" + register: __ha_cluster_check_sbd_devices_result + changed_when: false + # return_code == 0 means the disk is initialized already + # return_code != 0 means the disk is not initialized yet + failed_when: false + # This command doesn't do any changes and so can safely be executed + # even in check_mode. + check_mode: false + + - name: Initialize SBD devices + ansible.builtin.command: + cmd: sbd -d {{ item.item | quote }} -1 60 -4 120 create + changed_when: true + loop: "{{ __ha_cluster_check_sbd_devices_result.results }}" + when: item.rc != 0 + + - name: Distribute SBD config + ansible.builtin.template: + src: templates/sbd + dest: /etc/sysconfig/sbd + owner: root + group: root + mode: '0644' + vars: + options: "{{ ha_cluster_sbd_options | d([]) }}" + node_name: "{{ __ha_cluster_node_name }}" + node_watchdog: "{{ ha_cluster.sbd_watchdog | d('/dev/watchdog') }}" + node_devices: "{{ ha_cluster.sbd_devices | d([]) }}" + register: __ha_cluster_distribute_sbd_config + + - name: Configure systemd timeout for SBD + vars: + __sbd_delay_start: "{{ ha_cluster_sbd_options + | selectattr('name', 'match', '^delay-start$') + | map(attribute='value') | list }}" + when: + - __sbd_delay_start | length > 0 + - __sbd_delay_start | first | int > 0 + block: + - name: Ensure /etc/systemd/system/sbd.service.d directory exists + ansible.builtin.file: + path: /etc/systemd/system/sbd.service.d + state: directory + owner: root + group: root + mode: '0755' + + - name: Override start timeout for SBD + ansible.builtin.template: + src: templates/override-timeout.conf + dest: /etc/systemd/system/sbd.service.d/override-timeout.conf + owner: root + group: root + mode: '0644' + vars: + # Make sure the timeout is at least the default 90 seconds. + # The intent is to make the timeout longer if needed, not shorter. + # yamllint disable rule:line-length + timeout_value: "{{ [90, + (__sbd_delay_start | first | float * 1.2) | round(0, 'ceil') | + int] | max }}" + # yamllint enable rule:line-length + + - name: Reload systemd service files + ansible.builtin.systemd: + daemon_reload: true diff --git a/tasks/shell_pcs/pcs-auth-pcs-0.10.yml b/tasks/shell_pcs/cluster-auth-pcs-0.10.yml similarity index 100% rename from tasks/shell_pcs/pcs-auth-pcs-0.10.yml rename to tasks/shell_pcs/cluster-auth-pcs-0.10.yml diff --git a/tasks/shell_pcs/pcs-auth.yml b/tasks/shell_pcs/cluster-auth.yml similarity index 94% rename from tasks/shell_pcs/pcs-auth.yml rename to tasks/shell_pcs/cluster-auth.yml index 516ec095..d0ecc827 100644 --- a/tasks/shell_pcs/pcs-auth.yml +++ b/tasks/shell_pcs/cluster-auth.yml @@ -21,7 +21,7 @@ - __ha_cluster_pcs_auth_status.rc != 2 - name: Run pcs auth - include_tasks: pcs-auth-{{ __ha_cluster_pcs_provider }}.yml + include_tasks: cluster-auth-{{ __ha_cluster_pcs_provider }}.yml # Include the tasks for nodes which can not talk to other nodes due to # missing pcs auth tokens. The pcs-auth-*.yml contains "run_once: true" as # running the auth on one node is sufficient (auth tokens are distributed by diff --git a/tasks/shell_pcs/pcs-configure-pcs-pcsd.yml b/tasks/shell_pcs/configure-shell.yml similarity index 100% rename from tasks/shell_pcs/pcs-configure-pcs-pcsd.yml rename to tasks/shell_pcs/configure-shell.yml diff --git a/templates/crmsh_corosync.j2 b/templates/crmsh_corosync.j2 new file mode 100644 index 00000000..af4f27f1 --- /dev/null +++ b/templates/crmsh_corosync.j2 @@ -0,0 +1,51 @@ +{{ ansible_managed | comment }} +{{ "system_role:ha_cluster" | comment(prefix="", postfix="") }} +totem { + version: {{ ha_cluster_corosync_params.totem.version | default(2) }} + token: {{ ha_cluster_corosync_params.totem.token | default(30000) }} + consensus: {{ ha_cluster_corosync_params.totem.consensus | default(36000) }} + token_retransmits_before_loss_const: {{ ha_cluster_corosync_params.totem.token_retransmits_before_loss_const | default(6) }} + secauth: {{ ha_cluster_corosync_params.totem.secauth | default("on") }} + crypto_cipher: {{ ha_cluster_corosync_params.totem.crypto_cipher | default("aes256") }} + crypto_hash: {{ ha_cluster_corosync_params.totem.crypto_hash | default("sha1") }} + clear_node_high_bit: {{ ha_cluster_corosync_params.totem.clear_node_high_bit | default("yes") }} + rrp_mode: {{ ha_cluster_corosync_params.totem.rrp_mode | default("passive") }} + + interface { + ringnumber: 0 + bindnetaddr: {{ ansible_facts['eth0']['ipv4']['address'] }} + mcastport: 5405 + ttl: 1 + } + transport: udpu +} +logging { + fileline: {{ ha_cluster_corosync_params.logging.fileline | default("off") }} + to_logfile: {{ ha_cluster_corosync_params.logging.to_logfile | default("yes") }} + to_syslog: {{ ha_cluster_corosync_params.logging.to_syslog | default("yes") }} + logfile: /var/log/cluster/corosync.log + debug: {{ ha_cluster_corosync_params.logging.debug | default("off") }} + timestamp: {{ ha_cluster_corosync_params.logging.timestamp | default("on") }} + logger_subsys { + subsys: QUORUM + debug: off + } +} +nodelist { +{% for host in ansible_play_batch %} + node { + ring0_addr: {{ hostvars[host]['ansible_facts']['eth0']['ipv4']['address'] }} + {% if hostvars[host]['ansible_facts']['eth1']['ipv4']['address'] is defined %} + ring1_addr: {{ hostvars[host]['ansible_facts']['eth1']['ipv4']['address'] }} + {% endif %} + nodeid: {{ loop.index }} + } +{% endfor %} +} +quorum { +# Enable and configure quorum subsystem (default: off) +# see also corosync.conf.5 and votequorum.5 +provider: corosync_votequorum +expected_votes: 2 +two_node: 1 +} \ No newline at end of file diff --git a/vars/RedHat.yml b/vars/RedHat.yml index 77f53ace..4084f660 100644 --- a/vars/RedHat.yml +++ b/vars/RedHat.yml @@ -41,3 +41,10 @@ __ha_cluster_services: - corosync - corosync-qdevice - pacemaker + +# fence agent list for os_family +__ha_cluster_fence_agent_packages_default: "{{ + ['fence-agents-all'] + + + (['fence-virt'] if ansible_architecture == 'x86_64' else []) + }}" diff --git a/vars/Suse.yml b/vars/Suse.yml index 3a8bd452..dc18a4b3 100644 --- a/vars/Suse.yml +++ b/vars/Suse.yml @@ -1,3 +1,43 @@ --- - # Variables for Suse set in ansible_facts['os_family'] +ha_cluster_pacemaker_shell: crmsh + +# Placeholder with pcs name +__ha_cluster_pcs_provider: crm + +__ha_cluster_role_essential_packages: + - 'pacemaker' + - 'corosync' + - 'crmsh' + - 'fence-agents' + - 'ha-cluster-bootstrap' + - 'patterns-ha-ha_sles' + - 'resource-agents' + - 'cluster-glue' + - 'socat' + - 'libxml2-tools' + - 'ClusterTools2' + - 'rsyslog' + +__ha_cluster_fullstack_node_packages: [] + +__ha_cluster_sbd_packages: + - sbd + - open-iscsi + +__ha_cluster_services: + - corosync + - corosync-qdevice + - pacemaker + +__ha_cluster_crm_shadow: shd + +# fence agent list for os_family +__ha_cluster_fence_agent_packages_default: ['fence-agents'] + +# Corosync input for jinja2 template +ha_cluster_corosync_params: + +# user override: selinux and firewall roles are currently not supported +ha_cluster_manage_firewall: false +ha_cluster_manage_selinux: false