From db212c94b2a4e570f5897bd2e5a302dffaac440e Mon Sep 17 00:00:00 2001 From: jamesbeedy Date: Mon, 25 Nov 2024 22:36:27 +0000 Subject: [PATCH] chore(slurmctld): charm maintained cgroup config These changes move the cgroup config to the charm constants.py. --- charms/slurmctld/charmcraft.yaml | 6 +----- charms/slurmctld/src/charm.py | 28 +++++++++++++++++++++++----- charms/slurmctld/src/constants.py | 7 +++++++ 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/charms/slurmctld/charmcraft.yaml b/charms/slurmctld/charmcraft.yaml index ecfaaf5..edf5562 100644 --- a/charms/slurmctld/charmcraft.yaml +++ b/charms/slurmctld/charmcraft.yaml @@ -96,11 +96,7 @@ config: cgroup-parameters: type: string - default: | - ConstrainCores=yes - ConstrainDevices=yes - ConstrainRAMSpace=yes - ConstrainSwapSpace=yes + default: "" description: | User supplied configuration for `cgroup.conf`. diff --git a/charms/slurmctld/src/charm.py b/charms/slurmctld/src/charm.py index 59b51cb..b1d030b 100755 --- a/charms/slurmctld/src/charm.py +++ b/charms/slurmctld/src/charm.py @@ -9,7 +9,11 @@ import subprocess from typing import Any, Dict, List, Optional, Union -from constants import CHARM_MAINTAINED_SLURM_CONF_PARAMETERS, PEER_RELATION +from constants import ( + CHARM_MAINTAINED_CGROUP_CONF_PARAMETERS, + CHARM_MAINTAINED_SLURM_CONF_PARAMETERS, + PEER_RELATION, +) from exceptions import IngressAddressUnavailableError from interface_slurmd import ( PartitionAvailableEvent, @@ -32,7 +36,7 @@ WaitingStatus, main, ) -from slurmutils.models import CgroupConfig, SlurmConfig +from slurmutils.models import SlurmConfig from charms.grafana_agent.v0.cos_agent import COSAgentProvider from charms.hpc_libs.v0.is_container import is_container @@ -235,10 +239,13 @@ def _on_write_slurm_conf( self._slurmctld.service.disable() self._slurmctld.config.dump(slurm_config) - # Write out any user_supplied_cgroup_parameters to /etc/slurm/cgroup.conf. - if user_supplied_cgroup_parameters := self.config.get("cgroup-parameters", ""): + # Write out any cgroup parameters to /etc/slurm/cgroup.conf. + if not is_container(): self._slurmctld.cgroup.dump( - CgroupConfig.from_str(str(user_supplied_cgroup_parameters)) + { + **CHARM_MAINTAINED_CGROUP_CONF_PARAMETERS, + **self._get_user_supplied_cgroup_parameters(), + } ) self._slurmctld.service.enable() @@ -329,6 +336,17 @@ def _get_user_supplied_parameters(self) -> Dict[Any, Any]: } return user_supplied_parameters + def _get_user_supplied_cgroup_parameters(self) -> Dict[str, str]: + """Gather, parse, and return the user supplied cgroup parameters.""" + user_supplied_cgroup_parameters = {} + if custom_cgroup_config := self.config.get("cgroup-parameters"): + user_supplied_cgroup_parameters = { + line.split("=")[0]: line.split("=", 1)[1] + for line in str(custom_cgroup_config).split("\n") + if not line.startswith("#") and line.strip() != "" + } + return user_supplied_cgroup_parameters + def _get_new_node_names_from_slurm_config( self, slurm_config: SlurmConfig ) -> List[Optional[str]]: diff --git a/charms/slurmctld/src/constants.py b/charms/slurmctld/src/constants.py index b0b32b5..f5ceeef 100644 --- a/charms/slurmctld/src/constants.py +++ b/charms/slurmctld/src/constants.py @@ -5,6 +5,13 @@ PEER_RELATION = "slurmctld-peer" +CHARM_MAINTAINED_CGROUP_CONF_PARAMETERS = { + "ConstrainCores": "yes", + "ConstrainDevices": "yes", + "ConstrainRAMSpace": "yes", + "ConstrainSwapSpace": "yes", +} + CHARM_MAINTAINED_SLURM_CONF_PARAMETERS = { "AuthAltParameters": {"jwt_key": "/var/lib/slurm/checkpoint/jwt_hs256.key"}, "AuthAltTypes": ["auth/jwt"],