From 91aaf79d159403d452a91813dea571d88f55011b Mon Sep 17 00:00:00 2001 From: Christian Tacke <58549698+ChristianTackeGSI@users.noreply.github.com> Date: Mon, 16 Aug 2021 18:27:01 +0200 Subject: [PATCH] ci(slurm): Update to Heterogeneous Cluster Running FairRoot jobs in parallel on one machine does not work currently. So allocate a whole node. Update tooling accordingly. --- FairRoot_build_test.cmake | 4 +++- slurm-submit.sh | 26 +++++++++++++++++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/FairRoot_build_test.cmake b/FairRoot_build_test.cmake index e86ea58da8..bb6035adb3 100644 --- a/FairRoot_build_test.cmake +++ b/FairRoot_build_test.cmake @@ -16,8 +16,10 @@ set(CTEST_USE_LAUNCHERS ON) set(CTEST_CONFIGURATION_TYPE "RelWithDebInfo") if(NOT NCPUS) - if(ENV{SLURM_CPUS_PER_TASK}) + if(DEFINED ENV{SLURM_CPUS_PER_TASK}) set(NCPUS $ENV{SLURM_CPUS_PER_TASK}) + elseif(DEFINED ENV{SLURM_JOB_CPUS_PER_NODE}) + set(NCPUS $ENV{SLURM_JOB_CPUS_PER_NODE}) else() include(ProcessorCount) ProcessorCount(NCPUS) diff --git a/slurm-submit.sh b/slurm-submit.sh index 87601b3fc1..0b3f26017d 100755 --- a/slurm-submit.sh +++ b/slurm-submit.sh @@ -1,15 +1,22 @@ #! /bin/bash +if [ $# != 2 ] +then + echo "*** Please call like: $0 LABEL JOBSH" + exit 1 +fi + label="$1" jobsh="$2" if [ -z "$ALFACI_SLURM_CPUS" ] then - ALFACI_SLURM_CPUS=32 + # ALFACI_SLURM_CPUS=20 + : fi if [ -z "$ALFACI_SLURM_EXTRA_OPTS" ] then - ALFACI_SLURM_EXTRA_OPTS="--hint=compute_bound" + ALFACI_SLURM_EXTRA_OPTS="--exclusive --cpu-bind=no" fi if [ -z "$ALFACI_SLURM_TIMEOUT" ] then @@ -23,15 +30,24 @@ fi echo "*** Slurm request options :" echo "*** Working directory ..: $PWD" echo "*** Queue ..............: $ALFACI_SLURM_QUEUE" -echo "*** CPUs ...............: $ALFACI_SLURM_CPUS" +if [ -n "$ALFACI_SLURM_CPUS" ] +then + echo "*** CPUs ...............: $ALFACI_SLURM_CPUS" +fi echo "*** Wall Time ..........: $ALFACI_SLURM_TIMEOUT min" echo "*** Job Name ...........: ${label}" echo "*** Extra Options ......: ${ALFACI_SLURM_EXTRA_OPTS}" + +srun_cmdline_opts="-p $ALFACI_SLURM_QUEUE -n 1 -N 1 -t $ALFACI_SLURM_TIMEOUT" +if [ -n "$ALFACI_SLURM_CPUS" ] +then + srun_cmdline_opts="$srun_cmdline_opts -c $ALFACI_SLURM_CPUS" +fi + echo "*** Submitting job at ....: $(date -R)" ( set -x - srun -p $ALFACI_SLURM_QUEUE -c $ALFACI_SLURM_CPUS -n 1 \ - -t $ALFACI_SLURM_TIMEOUT \ + srun $srun_cmdline_opts \ --job-name="${label}" \ ${ALFACI_SLURM_EXTRA_OPTS} \ bash "${jobsh}"