From 941991987ca6668fa79494c2f7a72ad367508751 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Wed, 18 Sep 2024 13:35:53 -0400 Subject: [PATCH 1/3] feat(slurm_ops): clean up public exports We now intend that you directly call the manager that you plan on using rather than compose it yourself within the specific Slurm charm. This commit makes the methods and classes that we want private have a underscore before their name so that it is more obvious that it isn't intended to be a publicly used object outside of the charm library. Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 447 +++++++++++++--------------- tests/unit/test_slurm_ops.py | 16 +- 2 files changed, 223 insertions(+), 240 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index d01ab95..3c126c1 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -52,13 +52,6 @@ def _on_install(self, _) -> None: __all__ = [ "SlurmOpsError", - "ServiceType", - "SlurmOpsManager", - "ServiceManager", - "MungeKeyManager", - "MungeManager", - "SnapManager", - "SlurmManagerBase", "SlurmctldManager", "SlurmdManager", "SlurmdbdManager", @@ -128,13 +121,14 @@ def _call( SlurmOpsError: Raised if the command fails. """ cmd = [cmd, *args] - _logger.debug(f"Executing command {cmd}") + _logger.debug(f"executing command {cmd}") result = subprocess.run(cmd, input=stdin, capture_output=True, text=True) if result.returncode != 0: _logger.error(f"command {cmd} failed with message {result.stderr}") if check: raise SlurmOpsError(f"command {cmd} failed. stderr:\n{result.stderr}") + return subprocess.CompletedProcess( args=result.args, stdout=result.stdout.strip() if result.stdout else None, @@ -147,12 +141,33 @@ def _snap(*args) -> str: """Control snap by via executed `snap ...` commands. Raises: - subprocess.CalledProcessError: Raised if snap command fails. + SlurmOpsError: Raised if snap command fails. """ return _call("snap", *args).stdout -class ServiceType(Enum): +def _systemctl(*args) -> str: + """Control systemd units via `systemctl ...` commands. + + Raises: + SlurmOpsError: Raised if systemctl command fails. + """ + return _call( + "systemctl", + *args, + ).stdout + + +def _mungectl(*args, stdin: Optional[str] = None) -> str: + """Control munge via `mungectl ...` commands. + + Raises: + SlurmOpsError: Raised if mungectl command fails. + """ + return _call("mungectl", *args, stdin=stdin).stdout + + +class _ServiceType(Enum): """Type of Slurm service to manage.""" MUNGED = "munged" @@ -165,78 +180,14 @@ class ServiceType(Enum): @property def config_name(self) -> str: """Configuration name on the slurm snap for this service type.""" - if self is ServiceType.SLURMCTLD: + if self is _ServiceType.SLURMCTLD: return "slurm" - if self is ServiceType.MUNGED: + if self is _ServiceType.MUNGED: return "munge" return self.value -class ServiceManager(ABC): - """Control a Slurm service.""" - - @abstractmethod - def __init__(self, service: ServiceType) -> None: ... - - @abstractmethod - def enable(self) -> None: - """Enable service.""" - - @abstractmethod - def disable(self) -> None: - """Disable service.""" - - @abstractmethod - def restart(self) -> None: - """Restart service.""" - - @abstractmethod - def active(self) -> bool: - """Return True if the service is active.""" - - @property - @abstractmethod - def type(self) -> ServiceType: - """Return the service type of the managed service.""" - - -class MungeKeyManager: - """Control the munge key.""" - - def _mungectl(self, *args: str, stdin: Optional[str] = None) -> str: - """Control munge via `mungectl ...`. - - Args: - *args: Arguments to pass to `mungectl`. - stdin: Input to pass to `mungectl` via stdin. - - Raises: - subprocess.CalledProcessError: Raised if `mungectl` command fails. - """ - return _call("mungectl", *args, stdin=stdin).stdout - - def get(self) -> str: - """Get the current munge key. - - Returns: - The current munge key as a base64-encoded string. - """ - return self._mungectl("key", "get") - - def set(self, key: str) -> None: - """Set a new munge key. - - Args: - key: A new, base64-encoded munge key. - """ - self._mungectl("key", "set", stdin=key) - - def generate(self) -> None: - """Generate a new, cryptographically secure munge key.""" - self._mungectl("key", "generate") - - class _EnvManager: """Control configuration of environment variables used in Slurm components. @@ -265,133 +216,64 @@ def unset(self, key: str) -> None: dotenv.unset_key(self._file, self._config_to_env_var(key)) -class SlurmOpsManager(ABC): - """Manager to control the installation, creation and configuration of Slurm-related services.""" +class _ServiceManager(ABC): + """Control a Slurm service.""" - @abstractmethod - def install(self) -> None: - """Install Slurm.""" + def __init__(self, service: _ServiceType) -> None: + self._service = service @abstractmethod - def version(self) -> str: - """Get the current version of Slurm installed on the system.""" + def enable(self) -> None: + """Enable service.""" - @property @abstractmethod - def slurm_path(self) -> Path: - """Get the path to the Slurm configuration directory.""" + def disable(self) -> None: + """Disable service.""" @abstractmethod - def service_manager_for(self, type: ServiceType) -> ServiceManager: - """Return the `ServiceManager` for the specified `ServiceType`.""" + def restart(self) -> None: + """Restart service.""" @abstractmethod - def _env_manager_for(self, type: ServiceType) -> _EnvManager: - """Return the `_EnvManager` for the specified `ServiceType`.""" - - -class MungeManager: - """Manage `munged` service operations.""" - - def __init__(self, ops_manager: SlurmOpsManager) -> None: - self.service = ops_manager.service_manager_for(ServiceType.MUNGED) - self.key = MungeKeyManager() - - -class PrometheusExporterManager: - """Manage `prometheus-slurm-exporter` service operations.""" - - def __init__(self, ops_manager: SlurmOpsManager) -> None: - self.service = ops_manager.service_manager_for(ServiceType.PROMETHEUS_EXPORTER) - - -class SlurmManagerBase: - """Base manager for Slurm services.""" - - def __init__(self, service: ServiceType, snap: bool = False) -> None: - self._ops_manager = SnapManager() if snap else AptManager() - self.service = self._ops_manager.service_manager_for(service) - self.munge = MungeManager(self._ops_manager) - self.exporter = PrometheusExporterManager(self._ops_manager) - self.install = self._ops_manager.install - self.version = self._ops_manager.version - - @property - def hostname(self) -> str: - """The hostname where this manager is running.""" - return socket.gethostname().split(".")[0] - - -class SlurmctldManager(SlurmManagerBase): - """Manager for the Slurmctld service.""" - - def __init__(self, *args, **kwargs) -> None: - super().__init__(service=ServiceType.SLURMCTLD, *args, **kwargs) - self._config_path = self._ops_manager.slurm_path / "slurm.conf" - - @contextmanager - def config(self) -> slurmconfig.SlurmConfig: - """Get the config manager of slurmctld.""" - with slurmconfig.edit(self._config_path) as config: - yield config - - -class SlurmdManager(SlurmManagerBase): - """Manager for the Slurmd service. - - This service will additionally provide some environment variables that need to be - passed through to the service in case the default service is overriden (e.g. a systemctl file override). - - - SLURMD_CONFIG_SERVER. Sets the `--conf-server` option for `slurmd`. - """ - - def __init__(self, *args, **kwargs) -> None: - super().__init__(service=ServiceType.SLURMD, *args, **kwargs) - self._env_manager = self._ops_manager._env_manager_for(ServiceType.SLURMD) + def active(self) -> bool: + """Return True if the service is active.""" @property - def config_server(self) -> str: - """Get the config server address of this Slurmd node.""" - return self._env_manager.get("CONFIG_SERVER") - - @config_server.setter - def config_server(self, addr: str) -> None: - """Set the config server address of this Slurmd node.""" - self._env_manager.set({"CONFIG_SERVER": addr}) - - @config_server.deleter - def config_server(self) -> None: - """Unset the config server address of this Slurmd node.""" - self._env_manager.unset("CONFIG_SERVER") + def type(self) -> _ServiceType: + """Return the service type of the managed service.""" + return self._service -class SlurmdbdManager(SlurmManagerBase): - """Manager for the Slurmdbd service.""" +class _SystemctlServiceManager(_ServiceManager): + """Control a Slurm service using systemctl services.""" - def __init__(self, *args, **kwargs) -> None: - super().__init__(service=ServiceType.SLURMDBD, *args, **kwargs) - self._config_path = self._ops_manager.slurm_path / "slurmdbd.conf" + def enable(self) -> None: + """Enable service. - @contextmanager - def config(self) -> slurmdbdconfig.SlurmdbdConfig: - """Get the config manager of slurmctld.""" - with slurmdbdconfig.edit(self._config_path) as config: - yield config + Raises: + SlurmOpsError: Raised if `systemctl enable ...` returns a non-zero returncode. + """ + _systemctl("enable", "--now", self._service.value) + def disable(self) -> None: + """Disable service.""" + _systemctl("disable", "--now", self._service.value) -class SlurmrestdManager(SlurmManagerBase): - """Manager for the Slurmrestd service.""" + def restart(self) -> None: + """Restart service.""" + _systemctl("reload-or-restart", self._service.value) - def __init__(self, *args, **kwargs) -> None: - super().__init__(service=ServiceType.SLURMRESTD, *args, **kwargs) + def active(self) -> bool: + """Return True if the service is active.""" + return ( + _call("systemctl", "is-active", "--quiet", self._service.value, check=False).returncode + == 0 + ) -class _SnapServiceManager(ServiceManager): +class _SnapServiceManager(_ServiceManager): """Control a Slurm service.""" - def __init__(self, service: ServiceType) -> None: - self._service = service - def enable(self) -> None: """Enable service.""" _snap("start", "--enable", f"slurm.{self._service.value}") @@ -415,13 +297,33 @@ def active(self) -> bool: # We don't do `"active" in state` because the word "active" is also part of "inactive" :) return "inactive" not in services[f"slurm.{self._service.value}"] + +class _OpsManager(ABC): + """Manager to control the installation, creation and configuration of Slurm-related services.""" + + @abstractmethod + def install(self) -> None: + """Install Slurm.""" + + @abstractmethod + def version(self) -> str: + """Get the current version of Slurm installed on the system.""" + @property - def type(self) -> ServiceType: - """Return the service type of the managed service.""" - return self._service + @abstractmethod + def slurm_path(self) -> Path: + """Get the path to the Slurm configuration directory.""" + @abstractmethod + def service_manager_for(self, type: _ServiceType) -> _ServiceManager: + """Return the `ServiceManager` for the specified `ServiceType`.""" -class SnapManager(SlurmOpsManager): + @abstractmethod + def _env_manager_for(self, type: _ServiceType) -> _EnvManager: + """Return the `_EnvManager` for the specified `ServiceType`.""" + + +class _SnapManager(_OpsManager): """Slurm ops manager that uses Snap as its package manager.""" def install(self) -> None: @@ -445,58 +347,16 @@ def slurm_path(self) -> Path: """Get the path to the Slurm configuration directory.""" return Path("/var/snap/slurm/common/etc/slurm") - def service_manager_for(self, type: ServiceType) -> ServiceManager: + def service_manager_for(self, type: _ServiceType) -> _ServiceManager: """Return the `ServiceManager` for the specified `ServiceType`.""" return _SnapServiceManager(type) - def _env_manager_for(self, type: ServiceType) -> _EnvManager: + def _env_manager_for(self, type: _ServiceType) -> _EnvManager: """Return the `_EnvManager` for the specified `ServiceType`.""" return _EnvManager(file="/var/snap/slurm/common/.env", prefix=type.value) -# ========================= deb manager ========================= - - -class _SystemctlServiceManager(ServiceManager): - """Control a Slurm service using systemctl services.""" - - def __init__(self, service: ServiceType) -> None: - def systemctl(*args) -> str: - return _call("systemctl", *args, service.value).stdout - - self._service = service - self._systemctl = systemctl - - def enable(self) -> None: - """Enable service. - - Raises: - SlurmOpsError: Raised if `systemctl enable ...` returns a non-zero returncode. - """ - self._systemctl("enable", "--now") - - def disable(self) -> None: - """Disable service.""" - self._systemctl("disable", "--now") - - def restart(self) -> None: - """Restart service.""" - self._systemctl("reload-or-restart") - - def active(self) -> bool: - """Return True if the service is active.""" - return ( - _call("systemctl", "is-active", "--quiet", self._service.value, check=False).returncode - == 0 - ) - - @property - def type(self) -> ServiceType: - """Return the service type of the managed service.""" - return self._service - - -class AptManager(SlurmOpsManager): +class _AptManager(_OpsManager): """Slurm ops manager that uses apt as its package manager. NOTE: This manager provides some environment variables that are automatically passed to the @@ -639,10 +499,133 @@ def slurm_path(self) -> Path: """Get the path to the Slurm configuration directory.""" return Path("/etc/slurm") - def service_manager_for(self, type: ServiceType) -> ServiceManager: + def service_manager_for(self, type: _ServiceType) -> _ServiceManager: """Return the `ServiceManager` for the specified `ServiceType`.""" return _SystemctlServiceManager(type) - def _env_manager_for(self, type: ServiceType) -> _EnvManager: + def _env_manager_for(self, type: _ServiceType) -> _EnvManager: """Return the `_EnvManager` for the specified `ServiceType`.""" return _EnvManager(file=self._ENV_FILE, prefix=type.value) + + +class _MungeKeyManager: + """Control the munge key via `mungectl ...` commands.""" + + @staticmethod + def get() -> str: + """Get the current munge key. + + Returns: + The current munge key as a base64-encoded string. + """ + return _mungectl("key", "get") + + @staticmethod + def set(key: str) -> None: + """Set a new munge key. + + Args: + key: A new, base64-encoded munge key. + """ + _mungectl("key", "set", stdin=key) + + @staticmethod + def generate() -> None: + """Generate a new, cryptographically secure munge key.""" + _mungectl("key", "generate") + + +class _MungeManager: + """Manage `munged` service operations.""" + + def __init__(self, ops_manager: _OpsManager) -> None: + self.service = ops_manager.service_manager_for(_ServiceType.MUNGED) + self.key = _MungeKeyManager() + + +class _PrometheusExporterManager: + """Manage `prometheus-slurm-exporter` service operations.""" + + def __init__(self, ops_manager: _OpsManager) -> None: + self.service = ops_manager.service_manager_for(_ServiceType.PROMETHEUS_EXPORTER) + + +class _SlurmManagerBase: + """Base manager for Slurm services.""" + + def __init__(self, service: _ServiceType, snap: bool = False) -> None: + self._ops_manager = _SnapManager() if snap else _AptManager() + self.service = self._ops_manager.service_manager_for(service) + self.munge = _MungeManager(self._ops_manager) + self.exporter = _PrometheusExporterManager(self._ops_manager) + self.install = self._ops_manager.install + self.version = self._ops_manager.version + + @property + def hostname(self) -> str: + """The hostname where this manager is running.""" + return socket.gethostname().split(".")[0] + + +class SlurmctldManager(_SlurmManagerBase): + """Manager for the Slurmctld service.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=_ServiceType.SLURMCTLD, *args, **kwargs) + self._config_path = self._ops_manager.slurm_path / "slurm.conf" + + @contextmanager + def config(self) -> slurmconfig.SlurmConfig: + """Get the config manager of slurmctld.""" + with slurmconfig.edit(self._config_path) as config: + yield config + + +class SlurmdManager(_SlurmManagerBase): + """Manager for the Slurmd service. + + This service will additionally provide some environment variables that need to be + passed through to the service in case the default service is overriden (e.g. a systemctl file override). + + - SLURMD_CONFIG_SERVER. Sets the `--conf-server` option for `slurmd`. + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=_ServiceType.SLURMD, *args, **kwargs) + self._env_manager = self._ops_manager._env_manager_for(_ServiceType.SLURMD) + + @property + def config_server(self) -> str: + """Get the config server address of this Slurmd node.""" + return self._env_manager.get("CONFIG_SERVER") + + @config_server.setter + def config_server(self, addr: str) -> None: + """Set the config server address of this Slurmd node.""" + self._env_manager.set({"CONFIG_SERVER": addr}) + + @config_server.deleter + def config_server(self) -> None: + """Unset the config server address of this Slurmd node.""" + self._env_manager.unset("CONFIG_SERVER") + + +class SlurmdbdManager(_SlurmManagerBase): + """Manager for the Slurmdbd service.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=_ServiceType.SLURMDBD, *args, **kwargs) + self._config_path = self._ops_manager.slurm_path / "slurmdbd.conf" + + @contextmanager + def config(self) -> slurmdbdconfig.SlurmdbdConfig: + """Get the config manager of slurmctld.""" + with slurmdbdconfig.edit(self._config_path) as config: + yield config + + +class SlurmrestdManager(_SlurmManagerBase): + """Manager for the Slurmrestd service.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=_ServiceType.SLURMRESTD, *args, **kwargs) diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index 022238a..14bbbbc 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -13,13 +13,13 @@ import charms.hpc_libs.v0.slurm_ops as slurm import dotenv from charms.hpc_libs.v0.slurm_ops import ( - ServiceType, SlurmctldManager, SlurmdbdManager, SlurmdManager, - SlurmManagerBase, SlurmOpsError, - SnapManager, + _ServiceType, + _SlurmManagerBase, + _SnapManager, ) from pyfakefs.fake_filesystem_unittest import TestCase as FsTestCase @@ -86,7 +86,7 @@ def test_error_message(self, *_) -> None: ) class TestSnapPackageManager(FsTestCase): def setUp(self): - self.manager = SnapManager() + self.manager = _SnapManager() self.setUpPyfakefs() self.fs.create_file("/var/snap/slurm/common/.env") @@ -213,10 +213,10 @@ def test_hostname(self, gethostname, *_) -> None: parameters = [ - (SlurmManagerBase(ServiceType.SLURMCTLD, snap=True), "slurm"), - (SlurmManagerBase(ServiceType.SLURMD, snap=True), "slurmd"), - (SlurmManagerBase(ServiceType.SLURMDBD, snap=True), "slurmdbd"), - (SlurmManagerBase(ServiceType.SLURMRESTD, snap=True), "slurmrestd"), + (_SlurmManagerBase(_ServiceType.SLURMCTLD, snap=True), "slurm"), + (_SlurmManagerBase(_ServiceType.SLURMD, snap=True), "slurmd"), + (_SlurmManagerBase(_ServiceType.SLURMDBD, snap=True), "slurmdbd"), + (_SlurmManagerBase(_ServiceType.SLURMRESTD, snap=True), "slurmrestd"), ] for manager, config_name in parameters: From 2299c0c5ae430f17225ee43f34259c5ebb55aa3b Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Wed, 18 Sep 2024 13:52:13 -0400 Subject: [PATCH 2/3] feat(slurm_ops): add `scontrol` method to `_SlurmBaseManager` Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 3c126c1..7de238a 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -566,6 +566,15 @@ def hostname(self) -> str: """The hostname where this manager is running.""" return socket.gethostname().split(".")[0] + @staticmethod + def scontrol(*args) -> str: + """Control Slurm via `scontrol` commands. + + Raises: + SlurmOpsError: Raised if scontrol command fails. + """ + return _call("scontrol", *args).stdout + class SlurmctldManager(_SlurmManagerBase): """Manager for the Slurmctld service.""" From d58148669e7257e01614bb8af536d11fb5c315e1 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Wed, 18 Sep 2024 13:53:18 -0400 Subject: [PATCH 3/3] tests(slurm_ops): add tests for `scontrol` method Signed-off-by: Jason C. Nucciarone --- tests/unit/test_slurm_ops.py | 181 +++++++++++++++++++---------------- 1 file changed, 97 insertions(+), 84 deletions(-) diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index 14bbbbc..c72ce2d 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -6,6 +6,7 @@ import base64 import subprocess +import textwrap from pathlib import Path from unittest import TestCase from unittest.mock import patch @@ -211,6 +212,12 @@ def test_hostname(self, gethostname, *_) -> None: gethostname.return_value = "machine.domain.com" self.assertEqual(self.manager.hostname, "machine") + def test_scontrol(self, subcmd) -> None: + """Test that manager correctly calls scontrol.""" + self.manager.scontrol("reconfigure") + args = subcmd.call_args[0][0] + self.assertEqual(args, ["scontrol", "reconfigure"]) + parameters = [ (_SlurmManagerBase(_ServiceType.SLURMCTLD, snap=True), "slurm"), @@ -235,55 +242,58 @@ def test_hostname(self, gethostname, *_) -> None: class TestSlurmctldConfig(FsTestCase): """Test the Slurmctld service config manager.""" - EXAMPLE_SLURM_CONF = """# -# `slurm.conf` file generated at 2024-01-30 17:18:36.171652 by slurmutils. -# -SlurmctldHost=juju-c9fc6f-0(10.152.28.20) -SlurmctldHost=juju-c9fc6f-1(10.152.28.100) - -ClusterName=charmed-hpc -AuthType=auth/munge -Epilog=/usr/local/slurm/epilog -Prolog=/usr/local/slurm/prolog -FirstJobId=65536 -InactiveLimit=120 -JobCompType=jobcomp/filetxt -JobCompLoc=/var/log/slurm/jobcomp -KillWait=30 -MaxJobCount=10000 -MinJobAge=3600 -PluginDir=/usr/local/lib:/usr/local/slurm/lib -ReturnToService=0 -SchedulerType=sched/backfill -SlurmctldLogFile=/var/log/slurm/slurmctld.log -SlurmdLogFile=/var/log/slurm/slurmd.log -SlurmctldPort=7002 -SlurmdPort=7003 -SlurmdSpoolDir=/var/spool/slurmd.spool -StateSaveLocation=/var/spool/slurm.state -SwitchType=switch/none -TmpFS=/tmp -WaitTime=30 - -# -# Node configurations -# -NodeName=juju-c9fc6f-2 NodeAddr=10.152.28.48 CPUs=1 RealMemory=1000 TmpDisk=10000 -NodeName=juju-c9fc6f-3 NodeAddr=10.152.28.49 CPUs=1 RealMemory=1000 TmpDisk=10000 -NodeName=juju-c9fc6f-4 NodeAddr=10.152.28.50 CPUs=1 RealMemory=1000 TmpDisk=10000 -NodeName=juju-c9fc6f-5 NodeAddr=10.152.28.51 CPUs=1 RealMemory=1000 TmpDisk=10000 - -# -# Down node configurations -# -DownNodes=juju-c9fc6f-5 State=DOWN Reason="Maintenance Mode" - -# -# Partition configurations -# -PartitionName=DEFAULT MaxTime=30 MaxNodes=10 State=UP -PartitionName=batch Nodes=juju-c9fc6f-2,juju-c9fc6f-3,juju-c9fc6f-4,juju-c9fc6f-5 MinNodes=4 MaxTime=120 AllowGroups=admin -""" + EXAMPLE_SLURM_CONF = textwrap.dedent( + """ + # + # `slurm.conf` file generated at 2024-01-30 17:18:36.171652 by slurmutils. + # + SlurmctldHost=juju-c9fc6f-0(10.152.28.20) + SlurmctldHost=juju-c9fc6f-1(10.152.28.100) + + ClusterName=charmed-hpc + AuthType=auth/munge + Epilog=/usr/local/slurm/epilog + Prolog=/usr/local/slurm/prolog + FirstJobId=65536 + InactiveLimit=120 + JobCompType=jobcomp/filetxt + JobCompLoc=/var/log/slurm/jobcomp + KillWait=30 + MaxJobCount=10000 + MinJobAge=3600 + PluginDir=/usr/local/lib:/usr/local/slurm/lib + ReturnToService=0 + SchedulerType=sched/backfill + SlurmctldLogFile=/var/log/slurm/slurmctld.log + SlurmdLogFile=/var/log/slurm/slurmd.log + SlurmctldPort=7002 + SlurmdPort=7003 + SlurmdSpoolDir=/var/spool/slurmd.spool + StateSaveLocation=/var/spool/slurm.state + SwitchType=switch/none + TmpFS=/tmp + WaitTime=30 + + # + # Node configurations + # + NodeName=juju-c9fc6f-2 NodeAddr=10.152.28.48 CPUs=1 RealMemory=1000 TmpDisk=10000 + NodeName=juju-c9fc6f-3 NodeAddr=10.152.28.49 CPUs=1 RealMemory=1000 TmpDisk=10000 + NodeName=juju-c9fc6f-4 NodeAddr=10.152.28.50 CPUs=1 RealMemory=1000 TmpDisk=10000 + NodeName=juju-c9fc6f-5 NodeAddr=10.152.28.51 CPUs=1 RealMemory=1000 TmpDisk=10000 + + # + # Down node configurations + # + DownNodes=juju-c9fc6f-5 State=DOWN Reason="Maintenance Mode" + + # + # Partition configurations + # + PartitionName=DEFAULT MaxTime=30 MaxNodes=10 State=UP + PartitionName=batch Nodes=juju-c9fc6f-2,juju-c9fc6f-3,juju-c9fc6f-4,juju-c9fc6f-5 MinNodes=4 MaxTime=120 AllowGroups=admin + """ + ).strip() def setUp(self): self.manager = SlurmctldManager(snap=True) @@ -327,41 +337,44 @@ def test_config(self, *_) -> None: class TestSlurmdbdConfig(FsTestCase): """Test the Slurmdbd service config manager.""" - EXAMPLE_SLURMDBD_CONF = """# -# `slurmdbd.conf` file generated at 2024-01-30 17:18:36.171652 by slurmutils. -# -ArchiveEvents=yes -ArchiveJobs=yes -ArchiveResvs=yes -ArchiveSteps=no -ArchiveTXN=no -ArchiveUsage=no -ArchiveScript=/usr/sbin/slurm.dbd.archive -AuthInfo=/var/run/munge/munge.socket.2 -AuthType=auth/munge -AuthAltTypes=auth/jwt -AuthAltParameters=jwt_key=16549684561684@ -DbdHost=slurmdbd-0 -DbdBackupHost=slurmdbd-1 -DebugLevel=info -PluginDir=/all/these/cool/plugins -PurgeEventAfter=1month -PurgeJobAfter=12month -PurgeResvAfter=1month -PurgeStepAfter=1month -PurgeSuspendAfter=1month -PurgeTXNAfter=12month -PurgeUsageAfter=24month -LogFile=/var/log/slurmdbd.log -PidFile=/var/run/slurmdbd.pid -SlurmUser=slurm -StoragePass=supersecretpasswd -StorageType=accounting_storage/mysql -StorageUser=slurm -StorageHost=127.0.0.1 -StoragePort=3306 -StorageLoc=slurm_acct_db -""" + EXAMPLE_SLURMDBD_CONF = textwrap.dedent( + """ + # + # `slurmdbd.conf` file generated at 2024-01-30 17:18:36.171652 by slurmutils. + # + ArchiveEvents=yes + ArchiveJobs=yes + ArchiveResvs=yes + ArchiveSteps=no + ArchiveTXN=no + ArchiveUsage=no + ArchiveScript=/usr/sbin/slurm.dbd.archive + AuthInfo=/var/run/munge/munge.socket.2 + AuthType=auth/munge + AuthAltTypes=auth/jwt + AuthAltParameters=jwt_key=16549684561684@ + DbdHost=slurmdbd-0 + DbdBackupHost=slurmdbd-1 + DebugLevel=info + PluginDir=/all/these/cool/plugins + PurgeEventAfter=1month + PurgeJobAfter=12month + PurgeResvAfter=1month + PurgeStepAfter=1month + PurgeSuspendAfter=1month + PurgeTXNAfter=12month + PurgeUsageAfter=24month + LogFile=/var/log/slurmdbd.log + PidFile=/var/run/slurmdbd.pid + SlurmUser=slurm + StoragePass=supersecretpasswd + StorageType=accounting_storage/mysql + StorageUser=slurm + StorageHost=127.0.0.1 + StoragePort=3306 + StorageLoc=slurm_acct_db + """ + ).strip() def setUp(self): self.manager = SlurmdbdManager(snap=True)