Skip to content

Commit

Permalink
Merge branch 'main' into refactor/firewalld
Browse files Browse the repository at this point in the history
  • Loading branch information
sjpb authored Mar 7, 2024
2 parents bab676f + 6ca97fe commit 6231b20
Show file tree
Hide file tree
Showing 10 changed files with 20 additions and 4 deletions.
1 change: 1 addition & 0 deletions ansible/roles/filebeat/tasks/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
- name: Reload filebeat unit file
command: systemctl daemon-reload
when: _filebeat_unit.changed
become: true
1 change: 1 addition & 0 deletions ansible/roles/hpctests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Role Variables
- `hpctests_hpl_arch`: Optional, default 'linux64'. Arbitrary architecture name for HPL build. HPL is compiled on the first compute node of those selected (see `hpctests_nodes`), so this can be used to create different builds for different types of compute node.

The following variables should not generally be changed:
- `hpctests_pre_cmd`: Optional. Command(s) to include in sbatch templates before module load commands.
- `hpctests_pingmatrix_modules`: Optional. List of modules to load for pingmatrix test. Defaults are suitable for OpenHPC 2.x cluster using the required packages.
- `hpctests_pingpong_modules`: As above but for pingpong test.
- `hpctests_pingpong_plot`: Whether to plot pingpong results. Default `yes`.
Expand Down
1 change: 1 addition & 0 deletions ansible/roles/hpctests/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
hpctests_rootdir:
hpctests_pre_cmd: ''
hpctests_pingmatrix_modules: [gnu12 openmpi4]
hpctests_pingpong_modules: [gnu12 openmpi4 imb]
hpctests_pingpong_plot: yes
Expand Down
2 changes: 1 addition & 1 deletion ansible/roles/hpctests/templates/hpl-build.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
{%if hpctests_nodes is defined %}#SBATCH --nodelist={{ hpctests_computes.stdout_lines[0] }}{% endif %}

echo HPL arch: {{ hpctests_hpl_arch }}

{{ hpctests_pre_cmd }}
module load {{ hpctests_hpl_modules | join(' ' ) }}
make arch={{ hpctests_hpl_arch }} clean_arch_all
make arch={{ hpctests_hpl_arch }}
2 changes: 1 addition & 1 deletion ansible/roles/hpctests/templates/hpl-solo.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ echo SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST
echo SLURM_JOB_ID: $SLURM_JOB_ID
echo UCX_NET_DEVICES: $UCX_NET_DEVICES
echo HPL arch: {{ hpctests_hpl_arch }}

{{ hpctests_pre_cmd }}
module load {{ hpctests_hpl_modules | join(' ' ) }}
mpirun ./xhpl-{{ hpctests_hpl_arch }}
1 change: 1 addition & 0 deletions ansible/roles/hpctests/templates/pingmatrix.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export UCX_NET_DEVICES={{ hpctests_ucx_net_devices }}
echo SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST
echo SLURM_JOB_ID: $SLURM_JOB_ID
echo UCX_NET_DEVICES: $UCX_NET_DEVICES
{{ hpctests_pre_cmd }}
module load {{ hpctests_pingmatrix_modules | join(' ' ) }}

mpicc -o nxnlatbw mpi_nxnlatbw.c
Expand Down
1 change: 1 addition & 0 deletions ansible/roles/hpctests/templates/pingpong.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ export UCX_NET_DEVICES={{ hpctests_ucx_net_devices }}
echo SLURM_JOB_NODELIST: $SLURM_JOB_NODELIST
echo SLURM_JOB_ID: $SLURM_JOB_ID
echo UCX_NET_DEVICES: $UCX_NET_DEVICES
{{ hpctests_pre_cmd }}
module load {{ hpctests_pingpong_modules | join(' ' ) }}

#srun --mpi=pmi2 IMB-MPI1 pingpong # doesn't work in ohpc v2.1
Expand Down
11 changes: 11 additions & 0 deletions environments/.stackhpc/LEAFCLOUD.pkrvars.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
flavor = "en1.xsmall"
use_blockstorage_volume = true
volume_size = 12 # GB. Compatible with SMS-lab's general.v1.tiny
image_disk_format = "qcow2"
networks = ["909e49e8-6911-473a-bf88-0495ca63853c"] # slurmapp-ci
source_image_name = "openhpc-230804-1754-80b8d714" # https://github.com/stackhpc/ansible-slurm-appliance/pull/298
fatimage_source_image_name = "Rocky-8-GenericCloud-Base-8.9-20231119.0.x86_64.qcow2"
ssh_keypair_name = "slurm-app-ci"
ssh_private_key_file = "~/.ssh/id_ed25519"
security_groups = ["default", "SSH"]
floating_ip_network = "external"
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ bastion_config:
user: slurm-app-ci
ip: 128.232.222.183
LEAFCLOUD:
user: rocky
user: slurm-app-ci
ip: 195.114.30.222
# NB: The bastion_{user,ip} variables are used directly in the CI workflow too
bastion_user: "{{ bastion_config[ci_cloud].user }}"
Expand Down
2 changes: 1 addition & 1 deletion environments/.stackhpc/terraform/LEAFCLOUD.tfvars
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cluster_net = "slurmapp-ci"
cluster_subnet = "slurmapp-ci"
control_node_flavor = "ec1.medium" # small ran out of memory, medium gets down to ~100Mi mem free on deployment
other_node_flavor = "ec1.small"
other_node_flavor = "en1.xsmall"
state_volume_type = "unencrypted"
home_volume_type = "unencrypted"

0 comments on commit 6231b20

Please sign in to comment.