Skip to content

Commit

Permalink
login node definition for Slurm clusters (#1849)
Browse files Browse the repository at this point in the history
* add login nodes

* exclude login from compute nodes
  • Loading branch information
xpillons authored Feb 21, 2024
1 parent 1b16be1 commit 8a57d4c
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 1 deletion.
7 changes: 6 additions & 1 deletion config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,10 @@
"description": "Maximum number of nodes that can be allocated for the entire queue.",
"type": "integer"
},
"initial_count": {
"description": "Initial number of nodes allocated.",
"type": "integer"
},
"image": {
"description": "Can be either an image reference or an image_id from the image registry or a custom managed image",
"type": "string"
Expand Down Expand Up @@ -1142,7 +1146,8 @@
"description": "Type of queue - mandatory for remoteviz nodes",
"enum": [
"remoteviz",
"compute"
"compute",
"login"
],
"type": "string"
},
Expand Down
9 changes: 9 additions & 0 deletions config.tpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,15 @@ queues:
EnableAcceleratedNetworking: true
max_hours: 12
min_hours: 1
# Login nodes
- name: login
type: login
vm_size: Standard_F2s_v2
initial_count: 1
max_count: 1
image: azhpc:azhop-compute:centos-7_9:latest
ColocateNodes: false
EnableAcceleratedNetworking: true

# Application settings
applications:
Expand Down
44 changes: 44 additions & 0 deletions playbooks/roles/cyclecloud_cluster/templates/azhop-slurm.txt.j2
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,48 @@ echo "cloud-init done" >> /tmp/cloud-init.txt
address = {{mounts.home.server}}
options = {{ mounts.home.options | default("rw,hard,rsize=262144,wsize=262144,vers=3,tcp,_netdev",true) }}

{% for login in ( cc_queues | rejectattr('type', 'undefined') | selectattr('type', 'match', 'login'))%}
[[nodearray {{ login.name }}]]
InitialCount = {{ login.initial_count }}
MachineType = {{ login.vm_size }}
{% if login.max_core_count is defined %}
MaxCoreCount = {{ login.max_core_count }}
{% endif %}
{% if login.max_count is defined %}
MaxCount = {{ login.max_count }}
{% endif %}
{% if login.EnableAcceleratedNetworking is defined %}
EnableAcceleratedNetworking = {{ login.EnableAcceleratedNetworking }}
{% endif %}
{% if login.spot is defined %}
Interruptible = {{login.spot}}
{% endif %}
Azure.MaxScaleSetSize = {{ login.MaxScaleSetSize | default(100) }}
EphemeralOSDisk = {{ login.EphemeralOSDisk | default(false) }}
# Lookup image version for that queue
{% if cc_image_lookup is iterable and login.name in cc_image_lookup %}
ImageName = {{ cc_image_lookup[login.name] }}
{% else %}
ImageName = {{ login.image }}
{% endif %}
{% if login.plan is defined %}
{% set plan_details = login.plan.split(':') %}
ImagePlan.Publisher = {{ plan_details[0] }}
ImagePlan.Product = {{ plan_details[1] }}
ImagePlan.Name = {{ plan_details[2] }}
{% endif %}
{% if login.location is defined %}
SubnetId = {{ login.subnet }}
Region = {{ login.location }}
{% endif %}

[[[cluster-init cyclecloud/slurm:login:{{cyclecloud_slurm_release}}]]]
[[[configuration]]]
autoscale.enabled = false
slurm.use_nodename_as_hostname = true
slurm.node_prefix = ${StrJoin("-", ClusterName, "")}
{% endfor %}

[[node nodearraybase]]
Abstract = true
[[[configuration]]]
Expand All @@ -107,6 +149,7 @@ echo "cloud-init done" >> /tmp/cloud-init.txt
[[[cluster-init cyclecloud/slurm:execute:{{cyclecloud_slurm_release}}]]]

{% for queue in cc_queues %}
{% if (queue.type is undefined) or (queue.type is defined and queue.type != 'login') %}
[[nodearray {{ queue.name }}]]
Extends = nodearraybase
MachineType = {{ queue.vm_size }}
Expand Down Expand Up @@ -157,4 +200,5 @@ echo "cloud-init done" >> /tmp/cloud-init.txt
cyclecloud.cluster.autoscale.idle_time_before_jobs = {{queue.idle_timeout}}
{% endif %}
[[[cluster-init enroot:default:1.0.0]]]
{% endif %}
{% endfor %}

0 comments on commit 8a57d4c

Please sign in to comment.