diff --git a/playbooks/files/ood_templates/slurm/impi_pingpong/pingpong.sh b/playbooks/files/ood_templates/slurm/impi_pingpong/pingpong.sh index 6f5dba33..3f4222bb 100644 --- a/playbooks/files/ood_templates/slurm/impi_pingpong/pingpong.sh +++ b/playbooks/files/ood_templates/slurm/impi_pingpong/pingpong.sh @@ -5,7 +5,7 @@ #SBATCH -p hpc #SBATCH -t 5 #SBATCH --export=NONE -#SBACTH --exclusive +#SBATCH --exclusive source /etc/profile.d/modules.sh module use /usr/share/Modules/modulefiles diff --git a/playbooks/ood.yml b/playbooks/ood.yml index 8d35e373..220db679 100644 --- a/playbooks/ood.yml +++ b/playbooks/ood.yml @@ -628,4 +628,4 @@ apply: become: true vars: - packages_to_exclude_from_upgrade: "{{ (['ondemand','amlfs', 'jetpack8'] if ( lustre.create | default(false)) else ['ondemand', 'jetpack8']) }}" + packages_to_exclude_from_upgrade: "{{ (['ondemand','amlfs'] if ( lustre.create | default(false)) else ['ondemand']) }}" diff --git a/playbooks/roles/pbsserver/tasks/main.yml b/playbooks/roles/pbsserver/tasks/main.yml index 4e24f8cc..417bef28 100644 --- a/playbooks/roles/pbsserver/tasks/main.yml +++ b/playbooks/roles/pbsserver/tasks/main.yml @@ -117,13 +117,37 @@ args: chdir: /opt/cycle/pbspro -- name: Restart pbs-server - service: - name: pbs - state: restarted - - name: create cron entry to remove old accounting files cron: name: "remove PBS accounting files older than 90 days" special_time: daily job: "find /var/spool/pbs/server_logs -mtime +90 -type f -print -exec rm {} +" + +- name: Update Packages + include_role: + name: pkg_update + apply: + become: true + +- name: stop postgresql + service: + name: postgresql + state: stopped + +- name: stop pbs-server + service: + name: pbs + state: stopped + +- name: start postgresql + service: + name: postgresql + state: started + +- name: start pbs-server + service: + name: pbs + state: started + +- name: check pbs connection + command: qstat diff --git a/playbooks/roles/pkg_update/tasks/Ubuntu.yml b/playbooks/roles/pkg_update/tasks/Ubuntu.yml index ec65ed1a..5747f459 100644 --- a/playbooks/roles/pkg_update/tasks/Ubuntu.yml +++ b/playbooks/roles/pkg_update/tasks/Ubuntu.yml @@ -15,6 +15,19 @@ ansible.builtin.apt: name: "*" state: latest + # https://github.com/ansible/ansible/issues/51663 + # There has been an intermittent issue with this task where it would fail and print the error: + # + # Unable to acquire the dpkg frontend lock (/var/lib/dpkg/lock-frontend), is another process + # using it? + # + # The reason for this is unclear. It's not from unattended-upgrades as that has already been + # uninstalled when creating the base image. The workaround for now is to simply retry this task + # several times in the event that it fails, with a small delay between each attempt. + register: result + until: result is not failed + retries: 5 + delay: 15 - name: Check if reboot is required stat: diff --git a/playbooks/roles/slurm/tasks/main.yml b/playbooks/roles/slurm/tasks/main.yml index a6b9df7e..6c50d150 100644 --- a/playbooks/roles/slurm/tasks/main.yml +++ b/playbooks/roles/slurm/tasks/main.yml @@ -14,3 +14,11 @@ - import_tasks: '{{slurm_role}}.yml' become: true + +- name: Update Packages + include_role: + name: pkg_update + apply: + become: true + vars: + packages_to_exclude_from_upgrade: "['jetpack8']" diff --git a/playbooks/roles/slurm/tasks/slurmserver.yml b/playbooks/roles/slurm/tasks/slurmserver.yml index 1a28a07d..1ed49437 100644 --- a/playbooks/roles/slurm/tasks/slurmserver.yml +++ b/playbooks/roles/slurm/tasks/slurmserver.yml @@ -129,3 +129,4 @@ - import_tasks: pyxis.yml become: true tags: [ 'pyxis' ] + diff --git a/playbooks/scheduler.yml b/playbooks/scheduler.yml index 62f58f28..72d8e99e 100644 --- a/playbooks/scheduler.yml +++ b/playbooks/scheduler.yml @@ -44,10 +44,3 @@ cc_webserverpath: '{{cyclecloud.web_server_path | default("")}}' when: ( queue_manager is defined and queue_manager == "slurm" ) - - name: Update Packages - include_role: - name: pkg_update - apply: - become: true - vars: - packages_to_exclude_from_upgrade: "['jetpack8']"