From 1312b9f59e0d6151dfabe12c7237ac4ade81fe2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Tue, 3 Sep 2024 18:16:59 -0600 Subject: [PATCH] WIP: implement `_SnapFsConfigManager` using slurmutils --- dev-requirements.txt | 8 + lib/charms/hpc_libs/v0/slurm_ops.py | 86 ++++++++-- requirements.txt | 3 - tests/unit/test_slurm_ops.py | 257 +++++++++++++++++++--------- tox.ini | 4 +- 5 files changed, 249 insertions(+), 109 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 214054c..7d8070d 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,2 +1,10 @@ +# lib deps +slurmutils ~= 0.6.0 +python-dotenv ~= 1.0.1 +pyyaml >= 6.0.2 + +# tests deps +coverage[toml] ~= 7.6 +pyfakefs ~= 5.6.0 pytest ~= 7.2 pytest-order ~= 1.1 diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 0c19fb1..5547a6b 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -67,19 +67,24 @@ def _on_install(self, _) -> None: "SnapManager", ] -import json import logging +import os import re import socket import subprocess from abc import ABC, abstractmethod from collections.abc import Mapping +from contextlib import AbstractContextManager from enum import Enum +from functools import partial from pathlib import Path -from typing import Any, Optional, Type +from typing import Any, Callable, Optional, Type, TypeAlias import dotenv import yaml +from slurmutils.editors import slurmconfig, slurmdbdconfig +from slurmutils.exceptions import ModelError +from slurmutils.models.model import BaseModel as SlurmBaseConfig # The unique Charmhub library identifier, never change it LIBID = "541fd767f90b40539cf7cd6e7db8fabf" @@ -92,7 +97,7 @@ def _on_install(self, _) -> None: LIBPATCH = 6 # Charm library dependencies to fetch during `charmcraft pack`. -PYDEPS = ["pyyaml>=6.0.2", "python-dotenv~=1.0.1"] +PYDEPS = ["pyyaml>=6.0.2", "python-dotenv~=1.0.1", "slurmutils~=0.6.0"] _logger = logging.getLogger(__name__) _acronym = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])") @@ -381,7 +386,7 @@ def get(self, key: Optional[str] = None) -> Any: def set(self, config: Mapping[str, Any]) -> None: """Set configuration for Slurm component.""" - # Sanity check to avoid modifying the .env if the input keys are invalid. + # Check to avoid modifying the .env if the input keys are invalid. for key in config.keys(): if key not in self._keys: raise SlurmOpsError( @@ -393,7 +398,7 @@ def set(self, config: Mapping[str, Any]) -> None: def unset(self, *keys: str) -> None: """Unset configuration for Slurm component.""" - # Sanity check to avoid modifying the .env if the input keys are invalid. + # Check to avoid modifying the .env if the input keys are invalid. for key in keys: if key not in self._keys: raise SlurmOpsError( @@ -407,27 +412,64 @@ def unset(self, *keys: str) -> None: dotenv.unset_key(self._DOTENV_FILE, self.config_to_env_var(key)) -class _SnapConfigManager(ConfigManager): - """Control configuration of a Slurm component using Snap.""" +_EditorFetcher: TypeAlias = Callable[[], AbstractContextManager[SlurmBaseConfig]] +_ConfigFetcher: TypeAlias = Callable[[], SlurmBaseConfig] +_ConfigDeleter: TypeAlias = Callable[[], list] - def __init__(self, service: ServiceType) -> None: - self._name = service.config_name + +# TODO: probably move this abstraction (or something similar that enables the same) to slurmutils +class _SnapFsConfigManager(ConfigManager, ABC): + """Control configuration of a Slurm component using a file.""" + + def __init__( + self, editor: _EditorFetcher, config: _ConfigFetcher, deleter=_ConfigDeleter + ) -> None: + self._editor_fetcher = editor + self._config_fetcher = config + self._config_deleter = deleter def get(self, key: Optional[str] = None) -> Any: """Get specific configuration value for Slurm component.""" - key = f"{self._name}.{key}" if key else self._name - config = json.loads(_snap("get", "-d", "slurm", key)) - return config[key] + config = self._config_fetcher().dict() + + try: + for prop in key.split("."): + if isinstance(config, dict): + config = config[prop] + elif isinstance(config, list): + config = config[int(prop)] + else: + raise SlurmOpsError(f"invalid configuration key `{key}`") + return config + except ModelError as e: + raise SlurmOpsError(e.message) + except KeyError: + raise SlurmOpsError(f"invalid configuration key `{key}`") def set(self, config: Mapping[str, Any]) -> None: """Set configuration for Slurm component.""" - args = [f"{self._name}.{k}={json.dumps(v)}" for k, v in config.items()] - _snap("set", "slurm", *args) + with self._config_fetcher() as current_config: + new_config = current_config.from_dict(config) + current_config.update(new_config) def unset(self, *keys: str) -> None: """Unset configuration for Slurm component.""" - args = [f"{self._name}.{k}" for k in keys] if len(keys) > 0 else [self._name] - _snap("unset", "slurm", *args) + if len(keys) == 0: + self._config_deleter() + return + + with self._config_fetcher() as current_config: + for key in keys: + rest, last = key.rsplit(".", 1) + base = current_config + try: + for prop in rest.split("."): + base = getattr(base, prop) + del base[last] + except ModelError as e: + raise SlurmOpsError(e.message) + except KeyError: + raise SlurmOpsError(f"invalid configuration key `{key}`") class _SnapMungeKeyManager(MungeKeyManager): @@ -470,8 +512,16 @@ class SnapManager(SlurmOpsManager): """Slurm ops manager that uses Snap as its package manager.""" _CONFIG_SERVICES = { - ServiceType.SLURMCTLD: _SnapConfigManager(ServiceType.SLURMCTLD), - ServiceType.SLURMDBD: _SnapConfigManager(ServiceType.SLURMDBD), + ServiceType.SLURMCTLD: _SnapFsConfigManager( + editor=partial(slurmconfig.edit, "/var/snap/slurm/common/etc/slurm/slurm.conf"), + config=partial(slurmconfig.load, "/var/snap/slurm/common/etc/slurm/slurm.conf"), + deleter=partial(os.remove, "/var/snap/slurm/common/etc/slurm/slurm.conf"), + ), + ServiceType.SLURMDBD: _SnapFsConfigManager( + editor=partial(slurmdbdconfig.edit, "/var/snap/slurm/common/etc/slurm/slurmdbd.conf"), + config=partial(slurmdbdconfig.load, "/var/snap/slurm/common/etc/slurm/slurmdbd.conf"), + deleter=partial(os.remove, "/var/snap/slurm/common/etc/slurm/slurmdbd.conf"), + ), ServiceType.MUNGED: _SnapEnvConfigManager(ServiceType.MUNGED.value, ["max-thread-count"]), ServiceType.SLURMD: _SnapEnvConfigManager(ServiceType.SLURMD.value, ["config-server"]), ServiceType.PROMETHEUS_EXPORTER: _SnapEnvConfigManager( diff --git a/requirements.txt b/requirements.txt index e276adf..618ba75 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1 @@ ops ~= 2.5 -slurmutils ~= 0.4.0 -python-dotenv ~= 1.0.1 -pyyaml >= 6.0.2 diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index fbecf60..b397d8b 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -223,112 +223,199 @@ def test_hostname(self, gethostname, *_) -> None: class TestSlurmctldConfig(FsTestCase): """Test the Slurmctld service config manager.""" + EXAMPLE_SLURM_CONF = """# +# `slurm.conf` file generated at 2024-01-30 17:18:36.171652 by slurmutils. +# +SlurmctldHost=juju-c9fc6f-0(10.152.28.20) +SlurmctldHost=juju-c9fc6f-1(10.152.28.100) + +ClusterName=charmed-hpc +AuthType=auth/munge +Epilog=/usr/local/slurm/epilog +Prolog=/usr/local/slurm/prolog +FirstJobId=65536 +InactiveLimit=120 +JobCompType=jobcomp/filetxt +JobCompLoc=/var/log/slurm/jobcomp +KillWait=30 +MaxJobCount=10000 +MinJobAge=3600 +PluginDir=/usr/local/lib:/usr/local/slurm/lib +ReturnToService=0 +SchedulerType=sched/backfill +SlurmctldLogFile=/var/log/slurm/slurmctld.log +SlurmdLogFile=/var/log/slurm/slurmd.log +SlurmctldPort=7002 +SlurmdPort=7003 +SlurmdSpoolDir=/var/spool/slurmd.spool +StateSaveLocation=/var/spool/slurm.state +SwitchType=switch/none +TmpFS=/tmp +WaitTime=30 + +# +# Node configurations +# +NodeName=juju-c9fc6f-2 NodeAddr=10.152.28.48 CPUs=1 RealMemory=1000 TmpDisk=10000 +NodeName=juju-c9fc6f-3 NodeAddr=10.152.28.49 CPUs=1 RealMemory=1000 TmpDisk=10000 +NodeName=juju-c9fc6f-4 NodeAddr=10.152.28.50 CPUs=1 RealMemory=1000 TmpDisk=10000 +NodeName=juju-c9fc6f-5 NodeAddr=10.152.28.51 CPUs=1 RealMemory=1000 TmpDisk=10000 + +# +# Down node configurations +# +DownNodes=juju-c9fc6f-5 State=DOWN Reason="Maintenance Mode" + +# +# Partition configurations +# +PartitionName=DEFAULT MaxTime=30 MaxNodes=10 State=UP +PartitionName=batch Nodes=juju-c9fc6f-2,juju-c9fc6f-3,juju-c9fc6f-4,juju-c9fc6f-5 MinNodes=4 MaxTime=120 AllowGroups=admin +""" + def setUp(self): self.manager = SlurmManagerBase(ServiceType.SLURMCTLD, snap=True) self.config_name = "slurm" self.setUpPyfakefs() self.fs.create_file("/var/snap/slurm/common/.env") + self.fs.create_file( + "/var/snap/slurm/common/etc/slurm/slurm.conf", contents=self.EXAMPLE_SLURM_CONF + ) def test_get_options(self, subcmd) -> None: """Test that the manager correctly collects all requested configuration options.""" - subcmd.return_value = '{"%(name)s.key1": "value1", "%(name)s.key2": "value2"}' % { - "name": self.config_name - } - value = self.manager.config.get_options("key1", "key2") - calls = [args[0][0] for args in subcmd.call_args_list] - self.assertEqual(calls[0], ["snap", "get", "-d", "slurm", f"{self.config_name}.key1"]) - self.assertEqual(calls[1], ["snap", "get", "-d", "slurm", f"{self.config_name}.key2"]) - self.assertEqual(value, {"key1": "value1", "key2": "value2"}) - - def test_get_config(self, subcmd, *_) -> None: - """Test that the manager calls the correct `snap get ...` command.""" - subcmd.return_value = '{"%s.key": "value"}' % self.config_name - value = self.manager.config.get("key") - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "get", "-d", "slurm", f"{self.config_name}.key"]) - self.assertEqual(value, "value") - - def test_get_config_all(self, subcmd) -> None: - """Test that manager calls the correct `snap get ...` with no arguments given.""" - subcmd.return_value = '{"%s": "value"}' % self.config_name - value = self.manager.config.get() - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "get", "-d", "slurm", self.config_name]) - self.assertEqual(value, "value") - - def test_set_config(self, subcmd, *_) -> None: - """Test that the manager calls the correct `snap set ...` command.""" - self.manager.config.set({"key": "value"}) - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "set", "slurm", f'{self.config_name}.key="value"']) + value = self.manager.config.get_options( + "SlurmdLogFile", "Nodes.juju-c9fc6f-2.NodeAddr", "DownNodes.0.State" + ) - def test_unset_config(self, subcmd) -> None: - """Test that the manager calls the correct `snap unset ...` command.""" - self.manager.config.unset("key") - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "unset", "slurm", f"{self.config_name}.key"]) - - def test_unset_config_all(self, subcmd) -> None: - """Test the manager calls the correct `snap unset ...` with no arguments given.""" - self.manager.config.unset() - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "unset", "slurm", self.config_name]) + # def test_get_config(self, subcmd, *_) -> None: + # """Test that the manager calls the correct `snap get ...` command.""" + # subcmd.return_value = '{"%s.key": "value"}' % self.config_name + # value = self.manager.config.get("key") + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "get", "-d", "slurm", f"{self.config_name}.key"]) + # self.assertEqual(value, "value") + + # def test_get_config_all(self, subcmd) -> None: + # """Test that manager calls the correct `snap get ...` with no arguments given.""" + # subcmd.return_value = '{"%s": "value"}' % self.config_name + # value = self.manager.config.get() + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "get", "-d", "slurm", self.config_name]) + # self.assertEqual(value, "value") + + # def test_set_config(self, subcmd, *_) -> None: + # """Test that the manager calls the correct `snap set ...` command.""" + # self.manager.config.set({"key": "value"}) + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "set", "slurm", f'{self.config_name}.key="value"']) + + # def test_unset_config(self, subcmd) -> None: + # """Test that the manager calls the correct `snap unset ...` command.""" + # self.manager.config.unset("key") + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "unset", "slurm", f"{self.config_name}.key"]) + + # def test_unset_config_all(self, subcmd) -> None: + # """Test the manager calls the correct `snap unset ...` with no arguments given.""" + # self.manager.config.unset() + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "unset", "slurm", self.config_name]) @patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") class TestSlurmdbdConfig(FsTestCase): """Test the Slurmdbd service config manager.""" + EXAMPLE_SLURMDBD_CONF = """# +# `slurmdbd.conf` file generated at 2024-01-30 17:18:36.171652 by slurmutils. +# +ArchiveEvents=yes +ArchiveJobs=yes +ArchiveResvs=yes +ArchiveSteps=no +ArchiveTXN=no +ArchiveUsage=no +ArchiveScript=/usr/sbin/slurm.dbd.archive +AuthInfo=/var/run/munge/munge.socket.2 +AuthType=auth/munge +AuthAltTypes=auth/jwt +AuthAltParameters=jwt_key=16549684561684@ +DbdHost=slurmdbd-0 +DbdBackupHost=slurmdbd-1 +DebugLevel=info +PluginDir=/all/these/cool/plugins +PurgeEventAfter=1month +PurgeJobAfter=12month +PurgeResvAfter=1month +PurgeStepAfter=1month +PurgeSuspendAfter=1month +PurgeTXNAfter=12month +PurgeUsageAfter=24month +LogFile=/var/log/slurmdbd.log +PidFile=/var/run/slurmdbd.pid +SlurmUser=slurm +StoragePass=supersecretpasswd +StorageType=accounting_storage/mysql +StorageUser=slurm +StorageHost=127.0.0.1 +StoragePort=3306 +StorageLoc=slurm_acct_db +""" + def setUp(self): self.manager = SlurmManagerBase(ServiceType.SLURMDBD, snap=True) self.config_name = "slurmdbd" self.setUpPyfakefs() self.fs.create_file("/var/snap/slurm/common/.env") + self.fs.create_file( + "/var/snap/slurm/common/etc/slurm/slurmdbd.conf", contents=self.EXAMPLE_SLURMDBD_CONF + ) - def test_get_options(self, subcmd) -> None: - """Test that the manager correctly collects all requested configuration options.""" - subcmd.return_value = '{"%(name)s.key1": "value1", "%(name)s.key2": "value2"}' % { - "name": self.config_name - } - value = self.manager.config.get_options("key1", "key2") - calls = [args[0][0] for args in subcmd.call_args_list] - self.assertEqual(calls[0], ["snap", "get", "-d", "slurm", f"{self.config_name}.key1"]) - self.assertEqual(calls[1], ["snap", "get", "-d", "slurm", f"{self.config_name}.key2"]) - self.assertEqual(value, {"key1": "value1", "key2": "value2"}) - - def test_get_config(self, subcmd, *_) -> None: - """Test that the manager calls the correct `snap get ...` command.""" - subcmd.return_value = '{"%s.key": "value"}' % self.config_name - value = self.manager.config.get("key") - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "get", "-d", "slurm", f"{self.config_name}.key"]) - self.assertEqual(value, "value") - - def test_get_config_all(self, subcmd) -> None: - """Test that manager calls the correct `snap get ...` with no arguments given.""" - subcmd.return_value = '{"%s": "value"}' % self.config_name - value = self.manager.config.get() - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "get", "-d", "slurm", self.config_name]) - self.assertEqual(value, "value") - - def test_set_config(self, subcmd, *_) -> None: - """Test that the manager calls the correct `snap set ...` command.""" - self.manager.config.set({"key": "value"}) - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "set", "slurm", f'{self.config_name}.key="value"']) - - def test_unset_config(self, subcmd) -> None: - """Test that the manager calls the correct `snap unset ...` command.""" - self.manager.config.unset("key") - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "unset", "slurm", f"{self.config_name}.key"]) - - def test_unset_config_all(self, subcmd) -> None: - """Test the manager calls the correct `snap unset ...` with no arguments given.""" - self.manager.config.unset() - args = subcmd.call_args[0][0] - self.assertEqual(args, ["snap", "unset", "slurm", self.config_name]) + # def test_get_options(self, subcmd) -> None: + # """Test that the manager correctly collects all requested configuration options.""" + # subcmd.return_value = '{"%(name)s.key1": "value1", "%(name)s.key2": "value2"}' % { + # "name": self.config_name + # } + # value = self.manager.config.get_options("key1", "key2") + # calls = [args[0][0] for args in subcmd.call_args_list] + # self.assertEqual(calls[0], ["snap", "get", "-d", "slurm", f"{self.config_name}.key1"]) + # self.assertEqual(calls[1], ["snap", "get", "-d", "slurm", f"{self.config_name}.key2"]) + # self.assertEqual(value, {"key1": "value1", "key2": "value2"}) + + # def test_get_config(self, subcmd, *_) -> None: + # """Test that the manager calls the correct `snap get ...` command.""" + # subcmd.return_value = '{"%s.key": "value"}' % self.config_name + # value = self.manager.config.get("key") + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "get", "-d", "slurm", f"{self.config_name}.key"]) + # self.assertEqual(value, "value") + + # def test_get_config_all(self, subcmd) -> None: + # """Test that manager calls the correct `snap get ...` with no arguments given.""" + # subcmd.return_value = '{"%s": "value"}' % self.config_name + # value = self.manager.config.get() + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "get", "-d", "slurm", self.config_name]) + # self.assertEqual(value, "value") + + # def test_set_config(self, subcmd, *_) -> None: + # """Test that the manager calls the correct `snap set ...` command.""" + # self.manager.config.set({"key": "value"}) + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "set", "slurm", f'{self.config_name}.key="value"']) + + # def test_unset_config(self, subcmd) -> None: + # """Test that the manager calls the correct `snap unset ...` command.""" + # self.manager.config.unset("key") + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "unset", "slurm", f"{self.config_name}.key"]) + + # def test_unset_config_all(self, subcmd) -> None: + # """Test the manager calls the correct `snap unset ...` with no arguments given.""" + # self.manager.config.unset() + # args = subcmd.call_args[0][0] + # self.assertEqual(args, ["snap", "unset", "slurm", self.config_name]) @patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") diff --git a/tox.ini b/tox.ini index 3bac066..577a3b4 100644 --- a/tox.ini +++ b/tox.ini @@ -49,9 +49,7 @@ commands = [testenv:unit] description = Run unit tests deps = - pytest - pyfakefs - coverage[toml] + -r {tox_root}/dev-requirements.txt -r {tox_root}/requirements.txt commands = coverage run --source={[vars]lib_path} \