diff --git a/README.md b/README.md index b5276d0..2bc6794 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ ansible-role-slurm Tested with these Linux distributions: - CentOS 7 - - 17.02.x (travis ci automatic testing) - - 17.11.x (travis ci automatic testing) - Ubuntu - 18.04 (client only) @@ -47,13 +45,6 @@ It is possible to run the slurmdbd on a different host than the slurmctld by cha It is also possible to setup a backup slurm controller by defining slurm_backup_controller variable. Please read the [SLURM HA documentation](https://slurm.schedmd.com/quickstart_admin.html#HA). For example you'll need a shared directory (for example NFS) available on both the slurm_service_node and slurm_backup_controller. -Specific versions of SLURM can be gotten from the FGCI yum repo by setting: -
-fgci_slurmrepo_version: "fgcislurm1711" -- -We have 1702 and 1711 RPMs there. - ### Implementation A playbook that uses this role: https://github.com/fgci-org/fgci-ansible @@ -78,28 +69,16 @@ Example Playbook ### Known Issues - - This role used to be able to build slurm rpms, distribute them and install them. The last tag/release that had this feature was v1.5.0 - Setting up a shared directory á la NFS for running a SLURM in HA is out of scope for this role. There are many [NFS server roles](https://github.com/CSCfi/ansible-role-nfs) and [Mount Filesystem roles](https://github.com/CSCfi/ansible-role-nfs_mount) roles out there. ### Testing and contributions -Testing is done with [Travis](.travis.yml). New SLURM release can be tested after the RPMs are built and available in the FGCI repo. After that one needs to add a new tests/test1702.yml and a new IMAGE_BUILD_PLATFORM env in .travis.yml. +Testing is done with [Travis](.travis.yml). - PRs to master - if possible make sure that the new feature is also tested - strive for backwards compatibility -**Adding testing of a new SLURM release** - -Using 17.11 as an example - - - Get CSC to build new rpms and put them in a new yum repo - - New branch in ansible-role-slurm with the following changes/additions: - - IMAGE_BUILD_PLATFORM=fgcislurm1711 in .travis.yml env: - - tests/test1711.yml with fgci_slurmrepo_version: "fgcislurm1711" - - tests/fgcislurm1711 directory symlink to tests/epel-centos7 - - Then make changes if needed to the role that does not break older SLURM versions - # Authors / Contributors: - Marco Passerini (original author) diff --git a/UPGRADE.md b/UPGRADE.md index c0454ed..ae0e92a 100644 --- a/UPGRADE.md +++ b/UPGRADE.md @@ -1,6 +1,10 @@ Switch from FGCI to OHPC slurm packages --------------------------------------- +NOTE 2021-09-30: +Changes has been made to slurm role after writing this doc. +It may not be relevant anymore. Now by default the slurm is coming from ohpc. + In general one needs to be careful with slurmdbd and run it in the foreground during upgrade to monitor progress. See http://slurm.schedmd.com/quickstart_admin.html#upgrade diff --git a/defaults/main.yml b/defaults/main.yml index 1184e34..e1ef73c 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -21,8 +21,7 @@ nis_server: False #slurm_user_uid: 5004 #slurm_user_gid: 5004 -fgci_slurmrepo_version: "fgcislurm1711" -slurm_repo: "fgci" # Or "ohpc" to use OHPC slurm packages +slurm_repo: "ohpc" # Or "ohpc" to use OHPC slurm packages slurm_ohpc_versionlock: True siteName: "io" nodeBase: "{{ siteName }}" diff --git a/tasks/common_ubuntu.yml b/tasks/common_ubuntu.yml index 9df3164..be182fa 100644 --- a/tasks/common_ubuntu.yml +++ b/tasks/common_ubuntu.yml @@ -1,12 +1,4 @@ --- -# This sh/could be put in a separate role.. -# define where we get .debs for slurm. - - name: Add local apt-repo - template: src=apt.repo.j2 dest=/etc/apt/sources.list.d/fgislurm.list owner=root group=root mode=0644 backup=yes - when: slurm_repo == 'fgci' and slurm_apt_repo == True - - -## # Set slurm user and group locally on every host if uid/gid given - name: add slurm unix group group: name=slurm system=no state=present gid={{ slurm_user_gid|default(slurm_user_uid) }} diff --git a/tasks/version.yml b/tasks/version.yml index a1ca98a..cd21505 100644 --- a/tasks/version.yml +++ b/tasks/version.yml @@ -1,25 +1,2 @@ --- -#### Version - - - name: Get version of installed slurm RPM - shell: yum list installed slurm | grep slurm | awk '{print $2}' | cut -d'-' -f1 - register: reg_slurm_yum_version - check_mode: no - changed_when: False - - - name: Get version of installed slurm RPM major version - shell: yum list installed slurm | grep slurm | awk '{print $2}' | cut -d'-' -f1|cut -d "." -f1-2|sed -e 's/\.//' - register: reg_slurm_yum_version_major - check_mode: no - changed_when: False - - - name: Set fact with contents of fgci_slurmrepo_version with only the numbers - set_fact: slurm_fact_fgci_slurmrepo_version="{{ fgci_slurmrepo_version | replace('fgcislurm', '')}}" - - - name: print custom facts in verbose mode - debug: var=item verbosity=1 - with_items: - - "{{ reg_slurm_yum_version['stdout'] }}" - - "{{ slurm_fact_fgci_slurmrepo_version }}" - - "{{ reg_slurm_yum_version_major['stdout'] }}" diff --git a/tasks/version_ubuntu.yml b/tasks/version_ubuntu.yml index 22f5c49..cd21505 100644 --- a/tasks/version_ubuntu.yml +++ b/tasks/version_ubuntu.yml @@ -1,25 +1,2 @@ --- -#### Version - - - name: Get version of installed slurm DEB - shell: dpkg -l slurm|grep "^ii"|awk '{print $3}'|cut -d'-' -f1 - register: reg_slurm_yum_version - check_mode: no - changed_when: False - - - name: Get version of installed slurm DEB major version - shell: dpkg -l slurm|grep "^ii"|awk '{print $3}'|cut -d'-' -f1|cut -d "." -f1-2|sed -e 's/\.//' - register: reg_slurm_yum_version_major - check_mode: no - changed_when: False - - - name: Set fact with contents of fgci_slurmrepo_version with only the numbers - set_fact: slurm_fact_fgci_slurmrepo_version="{{ fgci_slurmrepo_version | replace('fgcislurm', '')}}" - - - name: print custom facts in verbose mode - debug: var=item verbosity=1 - with_items: - - "{{ reg_slurm_yum_version['stdout'] }}" - - "{{ slurm_fact_fgci_slurmrepo_version }}" - - "{{ reg_slurm_yum_version_major['stdout'] }}" diff --git a/templates/fgislurm.repo b/templates/fgislurm.repo deleted file mode 100644 index 74b86e9..0000000 --- a/templates/fgislurm.repo +++ /dev/null @@ -1,6 +0,0 @@ -[fgislurm] -name=fgislurm -baseurl=http://idris.fgi.csc.fi/fgci7/x86_64/{{ fgci_slurmrepo_version }} -enabled=1 -gpgcheck=1 -gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CSC-GRID-2 diff --git a/tests/fgcislurm1702 b/tests/fgcislurm1702 deleted file mode 120000 index 8fc1a97..0000000 --- a/tests/fgcislurm1702 +++ /dev/null @@ -1 +0,0 @@ -epel-centos7 \ No newline at end of file diff --git a/tests/fgcislurm1711 b/tests/fgcislurm1711 deleted file mode 120000 index 8fc1a97..0000000 --- a/tests/fgcislurm1711 +++ /dev/null @@ -1 +0,0 @@ -epel-centos7 \ No newline at end of file diff --git a/tests/test-in-docker-image.sh b/tests/test-in-docker-image.sh index 302f189..96c6b79 100755 --- a/tests/test-in-docker-image.sh +++ b/tests/test-in-docker-image.sh @@ -9,17 +9,9 @@ OS_TYPE=${1:-} OS_VERSION=${2:-} ANSIBLE_VERSION=${3:-} -# So if we get fgcislurm as the first bash argument to this script we -# change playbook to a slurm specific version. -# This means to test a new SLURM version we need to add a new playbook. -if [[ $OS_TYPE = *"fgcislurm"* ]]; then - ANSIBLE_VAR="" - SLURMVERSION=$(echo $OS_TYPE|tr -d 'fgcislurm') - ANSIBLE_PLAYBOOk="tests/test$SLURMVERSION.yml" -else - ANSIBLE_VAR="" - ANSIBLE_PLAYBOOk="tests/test.yml" -fi +ANSIBLE_VAR="" +ANSIBLE_PLAYBOOk="tests/test.yml" + ANSIBLE_INVENTORY="tests/inventory" #ANSIBLE_LOG_LEVEL="" ANSIBLE_LOG_LEVEL="-v" diff --git a/tests/test.yml b/tests/test.yml index 6206527..63fc7db 100644 --- a/tests/test.yml +++ b/tests/test.yml @@ -26,7 +26,6 @@ - { match: "{gpu[2-22]}", name: "check_hw_ib", arguments: "56" } - { match: "*", name: "check_hw_eth", arguments: "eth0" } - slurm_plugstack: True - - fgci_slurmrepo_version: "fgcislurm1711" - slurm_x11_spank: True - slurm_topology_plugin: "topology/tree" - slurm_topologylist: diff --git a/tests/test1702.yml b/tests/test1702.yml deleted file mode 100644 index b7a2611..0000000 --- a/tests/test1702.yml +++ /dev/null @@ -1,17 +0,0 @@ ---- - - - name: install a SLURM 1702 cluster - hosts: install,compute - remote_user: root - roles: - - ansible-role-pam - - ansible-role-nhc - - ansible-role-slurm - vars_files: - - testvars.yml - vars: - - fgci_slurmrepo_version: "fgcislurm1702" - - pre_tasks: - - debug: var=group_names - - package: name=rsyslog state=present diff --git a/tests/test1711.yml b/tests/test1711.yml deleted file mode 100644 index 11e1338..0000000 --- a/tests/test1711.yml +++ /dev/null @@ -1,17 +0,0 @@ ---- - - - name: install a SLURM 1711 cluster - hosts: install,compute - remote_user: root - roles: - - ansible-role-pam - - ansible-role-nhc - - ansible-role-slurm - vars_files: - - testvars.yml - vars: - - fgci_slurmrepo_version: "fgcislurm1711" - - pre_tasks: - - debug: var=group_names - - package: name=rsyslog state=present