Skip to content

Commit

Permalink
ci(slurm): Update to Heterogeneous Cluster
Browse files Browse the repository at this point in the history
Running FairRoot jobs in parallel on one machine does not
work currently. So allocate a whole node.

Update tooling accordingly.
  • Loading branch information
ChristianTackeGSI authored and dennisklein committed Sep 1, 2021
1 parent 472f671 commit 91aaf79
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
4 changes: 3 additions & 1 deletion FairRoot_build_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ set(CTEST_USE_LAUNCHERS ON)
set(CTEST_CONFIGURATION_TYPE "RelWithDebInfo")

if(NOT NCPUS)
if(ENV{SLURM_CPUS_PER_TASK})
if(DEFINED ENV{SLURM_CPUS_PER_TASK})
set(NCPUS $ENV{SLURM_CPUS_PER_TASK})
elseif(DEFINED ENV{SLURM_JOB_CPUS_PER_NODE})
set(NCPUS $ENV{SLURM_JOB_CPUS_PER_NODE})
else()
include(ProcessorCount)
ProcessorCount(NCPUS)
Expand Down
26 changes: 21 additions & 5 deletions slurm-submit.sh
Original file line number Diff line number Diff line change
@@ -1,15 +1,22 @@
#! /bin/bash

if [ $# != 2 ]
then
echo "*** Please call like: $0 LABEL JOBSH"
exit 1
fi

label="$1"
jobsh="$2"

if [ -z "$ALFACI_SLURM_CPUS" ]
then
ALFACI_SLURM_CPUS=32
# ALFACI_SLURM_CPUS=20
:
fi
if [ -z "$ALFACI_SLURM_EXTRA_OPTS" ]
then
ALFACI_SLURM_EXTRA_OPTS="--hint=compute_bound"
ALFACI_SLURM_EXTRA_OPTS="--exclusive --cpu-bind=no"
fi
if [ -z "$ALFACI_SLURM_TIMEOUT" ]
then
Expand All @@ -23,15 +30,24 @@ fi
echo "*** Slurm request options :"
echo "*** Working directory ..: $PWD"
echo "*** Queue ..............: $ALFACI_SLURM_QUEUE"
echo "*** CPUs ...............: $ALFACI_SLURM_CPUS"
if [ -n "$ALFACI_SLURM_CPUS" ]
then
echo "*** CPUs ...............: $ALFACI_SLURM_CPUS"
fi
echo "*** Wall Time ..........: $ALFACI_SLURM_TIMEOUT min"
echo "*** Job Name ...........: ${label}"
echo "*** Extra Options ......: ${ALFACI_SLURM_EXTRA_OPTS}"

srun_cmdline_opts="-p $ALFACI_SLURM_QUEUE -n 1 -N 1 -t $ALFACI_SLURM_TIMEOUT"
if [ -n "$ALFACI_SLURM_CPUS" ]
then
srun_cmdline_opts="$srun_cmdline_opts -c $ALFACI_SLURM_CPUS"
fi

echo "*** Submitting job at ....: $(date -R)"
(
set -x
srun -p $ALFACI_SLURM_QUEUE -c $ALFACI_SLURM_CPUS -n 1 \
-t $ALFACI_SLURM_TIMEOUT \
srun $srun_cmdline_opts \
--job-name="${label}" \
${ALFACI_SLURM_EXTRA_OPTS} \
bash "${jobsh}"
Expand Down

0 comments on commit 91aaf79

Please sign in to comment.