diff --git a/README.md b/README.md index 7490e4e..1d98e12 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,46 @@ The variables that can be passed to this role and a brief description about them user: user1 # Install DRMAA library drmaa_lib_install: false + drmaa_lib_version: 1.0.7 + # SLURM default configuration options + slurm_default_conf_options: + AuthType: auth/munge + CryptoType: crypto/munge + FirstJobId: 1 + JobRequeue: 0 + JobSubmitPlugins: all_partitions + ProctrackType: proctrack/pgid + ReturnToService: 2 + SlurmctldPidFile: /var/run/slurmctld.pid + SlurmctldPort: 6817 + SlurmdPidFile: /var/run/slurmctld.pid + SlurmdPort: 6818 + SlurmdSpoolDir: /var/spool/slurm + SlurmUser: slurm + StateSaveLocation: /var/slurm/checkpoint + SwitchType: switch/none + TaskPlugin: task/none + InactiveLimit: 0 + KillWait: 30 + MessageTimeout: 30 + MinJobAge: 300 + SlurmctldTimeout: 30 + SlurmdTimeout: 40 + Waittime: 0 + FastSchedule: 1 + SchedulerType: sched/backfill + SelectType: select/linear + AccountingStorageType: accounting_storage/none + ClusterName: cluster + JobCompType: jobcomp/none + JobAcctGatherFrequency: 30 + JobAcctGatherType: jobacct_gather/none + SlurmctldDebug: debug5 + SlurmctldLogFile: /var/log/slurm/slurmctld.log + SlurmdDebug: debug5 + SlurmdLogFile: /var/log/slurm/slurmd.log + # SLURM user configuration options + slurm_conf_options: {} Example Playbook ---------------- diff --git a/defaults/main.yml b/defaults/main.yml index e539e2a..3bf7cb3 100644 --- a/defaults/main.yml +++ b/defaults/main.yml @@ -34,3 +34,43 @@ max_number_of_nodes: 3 slurm_vnode_prefix: wn # Install DRMAA library drmaa_lib_install: false +drmaa_lib_version: 1.0.7 +# SLURM default configuration options +slurm_default_conf_options: + AuthType: auth/munge + CryptoType: crypto/munge + FirstJobId: 1 + JobRequeue: 0 + JobSubmitPlugins: all_partitions + ProctrackType: proctrack/pgid + ReturnToService: 2 + SlurmctldPidFile: /var/run/slurmctld.pid + SlurmctldPort: 6817 + SlurmdPidFile: /var/run/slurmctld.pid + SlurmdPort: 6818 + SlurmdSpoolDir: /var/spool/slurm + SlurmUser: slurm + StateSaveLocation: /var/slurm/checkpoint + SwitchType: switch/none + TaskPlugin: task/none + InactiveLimit: 0 + KillWait: 30 + MessageTimeout: 30 + MinJobAge: 300 + SlurmctldTimeout: 30 + SlurmdTimeout: 40 + Waittime: 0 + FastSchedule: 1 + SchedulerType: sched/backfill + SelectType: select/linear + AccountingStorageType: accounting_storage/none + ClusterName: cluster + JobCompType: jobcomp/none + JobAcctGatherFrequency: 30 + JobAcctGatherType: jobacct_gather/none + SlurmctldDebug: debug5 + SlurmctldLogFile: /var/log/slurm/slurmctld.log + SlurmdDebug: debug5 + SlurmdLogFile: /var/log/slurm/slurmd.log +# SLURM user configuration options +slurm_conf_options: {} \ No newline at end of file diff --git a/tasks/drmaa.yml b/tasks/drmaa.yml index 39abe49..9336584 100644 --- a/tasks/drmaa.yml +++ b/tasks/drmaa.yml @@ -1,10 +1,5 @@ --- # Install DRMAA Library for slurm - - set_fact: - INSTALL_PATH: /opt - ID_FILE: "9" - FILENAME: "slurm-drmaa-1.0.7" - - name: Check if lib exists stat: path=/usr/local/lib/libdrmaa.so register: libstat @@ -14,10 +9,21 @@ - name: Install GCC package: name=gcc state=present + - set_fact: + INSTALL_PATH: /opt + FILENAME: "slurm-drmaa-{{ drmaa_lib_version }}" + - name: Download lib get_url: url: http://apps.man.poznan.pl/trac/slurm-drmaa/downloads/9 dest: "{{ INSTALL_PATH }}/{{ FILENAME }}.tgz" + when: drmaa_lib_version == "1.0.7" + + - name: Download lib + get_url: + url: https://github.com/natefoo/slurm-drmaa/releases/download/{{ drmaa_lib_version }}/slurm-drmaa-{{ drmaa_lib_version }}.tar.gz + dest: "{{ INSTALL_PATH }}/{{ FILENAME }}.tgz" + when: drmaa_lib_version != "1.0.7" - name: unarchive lib unarchive: diff --git a/tasks/main.yml b/tasks/main.yml index 8ec2744..609a81b 100644 --- a/tasks/main.yml +++ b/tasks/main.yml @@ -58,6 +58,15 @@ - /var/slurm/checkpoint - /etc/slurm + - name: Use BLCR as checkpointing type + set_fact: + slurm_conf_options: "{{ slurm_default_conf_options | combine({'CheckpointType': 'checkpoint/blcr'}) }}" + when : '"blcr" in templates' + + - name: Update default options with user options + set_fact: + slurm_conf_options: "{{ slurm_default_conf_options | combine(slurm_conf_options) }}" + - name: Include "{{slurm_type_of_node}}" SLURM recipe include_tasks: "{{slurm_type_of_node}}.yaml" diff --git a/templates/slurm.conf.j2 b/templates/slurm.conf.j2 index 7e173c4..a808ec9 100644 --- a/templates/slurm.conf.j2 +++ b/templates/slurm.conf.j2 @@ -3,160 +3,11 @@ # See the slurm.conf man page for more information. # ControlMachine={{slurm_server_name}} -#ControlAddr= -#BackupController= -#BackupAddr= -# -AuthType=auth/munge -CacheGroups=0 -{% if 'blcr' in templates %} -CheckpointType=checkpoint/blcr -{% endif %} +{% for key, value in slurm_conf_options.items() %} +{{key}}={{value}} +{% endfor %} -CryptoType=crypto/munge -#DisableRootJobs=NO -#EnforcePartLimits=NO -#Epilog=/usr/local/slurm/epilog_controller -#EpilogSlurmctld=/usr/local/slurm/epilog_controller -FirstJobId=1 -#MaxJobId=999999 -#GresTypes= -#GroupUpdateForce=0 -#GroupUpdateTime=600 -#JobCheckpointDir=/var/slurm/checkpoint -#JobCredentialPrivateKey= -#JobCredentialPublicCertificate= -#JobFileAppend=0 -JobRequeue=0 -JobSubmitPlugins=all_partitions -#KillOnBadExit=0 -#Licenses=foo*4,bar -#MailProg=/bin/mail -#MaxJobCount=5000 -#MaxStepCount=40000 -#MaxTasksPerNode=128 -#MpiDefault=openmpi -#MpiParams=ports=12000-12999 -#PluginDir= -#PlugStackConfig= -#PrivateData=jobs -ProctrackType=proctrack/pgid -#Prolog= -#PrologSlurmctld= -#PropagatePrioProcess=0 -#PropagateResourceLimits= -#PropagateResourceLimitsExcept= -ReturnToService=2 -#SallocDefaultCommand= -SlurmctldPidFile=/var/run/slurmctld.pid -SlurmctldPort=6817 -SlurmdPidFile=/var/run/slurmctld.pid -SlurmdPort=6818 -SlurmdSpoolDir=/var/spool/slurm -SlurmUser=slurm -#SlurmdUser=root -#SrunEpilog= -#SrunProlog= -StateSaveLocation=/var/slurm/checkpoint -SwitchType=switch/none -#TaskEpilog= -TaskPlugin=task/none -#TaskPluginParam= -#TaskProlog= -#TopologyPlugin=topology/tree -#TmpFs=/tmp -#TrackWCKey=no -#TreeWidth= -#UnkillableStepProgram= -#UsePAM=0 -# -# -# TIMERS -#BatchStartTimeout=10 -#CompleteWait=0 -#EpilogMsgTime=2000 -#GetEnvTimeout=2 -#HealthCheckInterval=0 -#HealthCheckProgram= -InactiveLimit=0 -KillWait=30 -MessageTimeout=30 -#ResvOverRun=0 -MinJobAge=300 -#OverTimeLimit=0 -SlurmctldTimeout=30 -SlurmdTimeout=40 -#SlurmctldTimeout=120 -#SlurmdTimeout=300 -#UnkillableStepTimeout=60 -#VSizeFactor=0 -Waittime=0 -# -# -# SCHEDULING -#DefMemPerCPU=0 -FastSchedule=1 -#MaxMemPerCPU=0 -#SchedulerRootFilter=1 -#SchedulerTimeSlice=30 -SchedulerType=sched/backfill -SchedulerPort=7321 -SelectType=select/linear -#SelectTypeParameters= -# -# -# JOB PRIORITY -#PriorityType=priority/basic -#PriorityDecayHalfLife= -#PriorityCalcPeriod= -#PriorityFavorSmall= -#PriorityMaxAge= -#PriorityUsageResetPeriod= -#PriorityWeightAge= -#PriorityWeightFairshare= -#PriorityWeightJobSize= -#PriorityWeightPartition= -#PriorityWeightQOS= -# -# -# LOGGING AND ACCOUNTING -#AccountingStorageEnforce=0 -#AccountingStorageHost= -#AccountingStorageLoc= -#AccountingStoragePass= -#AccountingStoragePort= -AccountingStorageType=accounting_storage/none -#AccountingStorageUser= -AccountingStoreJobComment=YES -ClusterName=cluster -#DebugFlags= -#JobCompHost= -#JobCompLoc= -#JobCompPass= -#JobCompPort= -JobCompType=jobcomp/none -#JobCompUser= -JobAcctGatherFrequency=30 -JobAcctGatherType=jobacct_gather/none -SlurmctldDebug=debug5 -SlurmctldLogFile=/var/log/slurm/slurmctld.log -SlurmdDebug=debug5 -SlurmdLogFile=/var/log/slurm/slurmd.log -#SlurmSchedLogFile= -#SlurmSchedLogLevel= -# -# -# POWER SAVE SUPPORT FOR IDLE NODES (optional) -#SuspendProgram= -#ResumeProgram= -#SuspendTimeout= -#ResumeTimeout= -#ResumeRate= -#SuspendExcNodes= -#SuspendExcParts= -#SuspendRate= -#SuspendTime= # # # COMPUTE NODES diff --git a/tests/test.yml b/tests/test.yml index 2533bde..a9fae57 100644 --- a/tests/test.yml +++ b/tests/test.yml @@ -9,3 +9,6 @@ slurm_wn_ips: ["127.0.0.1"] slurm_vnode_prefix: vnode- max_number_of_nodes: 1 + drmaa_lib_install: true + drmaa_lib_version: 1.1.4 + slurm_version: 21.08.8