From e76c8e35f0bab2417d499ed896f2e1be94e3b64d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Juli=C3=A1n=20Espina?= Date: Thu, 22 Aug 2024 17:04:56 -0600 Subject: [PATCH] Implement `SnapEnvConfigManager` (WIP) --- dev-requirements.txt | 2 +- lib/charms/hpc_libs/v0/slurm_ops.py | 187 +++++++++++++++++++--------- requirements.txt | 2 + tests/unit/test_slurm_ops.py | 6 +- tox.ini | 1 + 5 files changed, 134 insertions(+), 64 deletions(-) diff --git a/dev-requirements.txt b/dev-requirements.txt index 5960b53..214054c 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,2 +1,2 @@ pytest ~= 7.2 -pytest-order ~= 1.1 \ No newline at end of file +pytest-order ~= 1.1 diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 59487d4..f769efd 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -75,8 +75,10 @@ def _on_install(self, _) -> None: from abc import ABC, abstractmethod from collections.abc import Mapping from enum import Enum +from pathlib import Path from typing import Any, Optional, Type +import dotenv import yaml # The unique Charmhub library identifier, never change it @@ -90,13 +92,38 @@ def _on_install(self, _) -> None: LIBPATCH = 6 # Charm library dependencies to fetch during `charmcraft pack`. -PYDEPS = ["pyyaml>=6.0.1"] +PYDEPS = ["pyyaml>=6.0.1", "python-dotenv~=1.0.1"] _logger = logging.getLogger(__name__) _acronym = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])") _kebabize = re.compile(r"(?<=[a-z0-9])(?=[A-Z])") +def _call(cmd: str, *args: str, stdin: Optional[str] = None) -> str: + """Call a command with logging. + + Raises: + SlurmOpsError: Raised if the command fails. + """ + cmd = [cmd, *args] + _logger.debug(f"Executing command {cmd}") + try: + return subprocess.check_output(cmd, input=stdin, stderr=subprocess.PIPE, text=True).strip() + except subprocess.CalledProcessError as e: + _logger.error(f"`{' '.join(cmd)}` failed") + _logger.error(f"stderr: {e.stderr.decode()}") + raise SlurmOpsError(f"command {cmd[0]} failed. reason:\n{e.stderr.decode()}") + + +def _snap(*args) -> str: + """Control snap by via executed `snap ...` commands. + + Raises: + subprocess.CalledProcessError: Raised if snap command fails. + """ + return _call("snap", *args) + + def format_key(key: str) -> str: """Format Slurm configuration keys from SlurmCASe into kebab case. @@ -240,15 +267,13 @@ def install(self) -> None: def version(self) -> str: """Get the current version of Slurm installed on the system.""" - @property @abstractmethod - def service_manager(self) -> Type[ServiceManager]: - """Get the `ServiceManager` class of this ops manager.""" + def service_manager_for(self, type: ServiceType) -> Type[ServiceManager]: + """Return the `ServiceManager` for the specified `ServiceType`.""" - @property @abstractmethod - def config_manager(self) -> Type[ConfigManager]: - """Get the `ConfigurationManager` class of this ops manager.""" + def config_manager_for(self, type: ServiceType) -> Type[ConfigManager]: + """Return the `ConfigManager` for the specified `ServiceType`.""" @property @abstractmethod @@ -260,8 +285,8 @@ class MungeManager: """Manage `munged` service operations.""" def __init__(self, ops_manager: SlurmOpsManager) -> None: - self.service = ops_manager.service_manager(ServiceType.MUNGED) - self.config = ops_manager.config_manager(ServiceType.MUNGED) + self.service = ops_manager.service_manager_for(ServiceType.MUNGED) + self.config = ops_manager.config_manager_for(ServiceType.MUNGED) self.key = ops_manager.munge_key_manager() @@ -269,7 +294,7 @@ class PrometheusExporterManager: """Manage `slurm-prometheus-exporter` service operations.""" def __init__(self, ops_manager: SlurmOpsManager) -> None: - self.service = ops_manager.service_manager(ServiceType.PROMETHEUS_EXPORTER) + self.service = ops_manager.service_manager_for(ServiceType.PROMETHEUS_EXPORTER) class SlurmManagerBase: @@ -279,8 +304,8 @@ def __init__(self, service: ServiceType, snap: bool = False) -> None: if not snap: raise SlurmOpsError("deb packaging is currently unimplemented") ops_manager = SnapManager() - self.service = ops_manager.service_manager(service) - self.config = ops_manager.config_manager(service) + self.service = ops_manager.service_manager_for(service) + self.config = ops_manager.config_manager_for(service) self.munge = MungeManager(ops_manager) self.exporter = PrometheusExporterManager(ops_manager) self.install = ops_manager.install @@ -292,37 +317,6 @@ def hostname(self) -> str: return socket.gethostname().split(".")[0] -class SnapManager(SlurmOpsManager): - """Slurm ops manager that uses Snap as its package manager.""" - - def install(self) -> None: - """Install Slurm using the `slurm` snap.""" - # FIXME: Pin slurm to the stable channel - _snap("install", "slurm", "--channel", "latest/candidate", "--classic") - - def version(self) -> str: - """Get the current version of the `slurm` snap installed on the system.""" - info = yaml.safe_load(_snap("info", "slurm")) - if (ver := info.get("installed")) is None: - raise SlurmOpsError("unable to retrive snap info. ensure slurm is correctly installed") - return ver.split(maxsplit=1)[0] - - @property - def service_manager(self) -> Type[ServiceManager]: - """Get the `ServiceManager` class of `SnapManager`.""" - return _SnapServiceManager - - @property - def config_manager(self) -> Type[ConfigManager]: - """Get the `ConfigurationManager` class of this ops manager.""" - return _SnapConfigManager - - @property - def munge_key_manager(self) -> Type[MungeKeyManager]: - """Get the `MungekeyManager` class of this ops manager.""" - return _SnapMungeKeyManager - - class _SnapServiceManager(ServiceManager): """Control a Slurm service.""" @@ -358,6 +352,58 @@ def type(self) -> ServiceType: return self._service +class _SnapEnvConfigManager(ConfigManager): + """Control configuration of a Slurm Snap component that uses environment variables to manage settings.""" + + _DOTENV_FILE = Path("/var/snap/slurm/common/.env") + + def __init__(self, prefix: str, keys: [str]) -> None: + self._name: str = prefix + self._keys: [str] = keys + + def config_to_env_var(self, key: str) -> str: + """Get the environment variable corresponding to the configuration `key`.""" + return self._name.replace("-", "_").upper() + "_" + key.replace("-", "_").upper() + + def get(self, key: Optional[str] = None) -> Any: + """Get specific configuration value for Slurm component.""" + if not key: + return { + k: value + for k in self._keys + if (value := dotenv.get_key(self._DOTENV_FILE, self.config_to_env_var(k))) + } + + if key not in self._keys: + raise SlurmOpsError(f"invalid configuration key `{key}` for service `{self._name}`") + + return dotenv.get_key(self._DOTENV_FILE, self.config_to_env_var(key)) + + def set(self, config: Mapping[str, Any]) -> None: + """Set configuration for Slurm component.""" + # Sanity check to avoid modifying the .env if the input keys are invalid. + for key in config.keys(): + if key not in self._keys: + raise SlurmOpsError( + f"invalid configuration key `{key}` for service `{self._name}`" + ) + + for key, value in config.values(): + dotenv.set_key(self._DOTENV_FILE, self.config_to_env_var(key), str(value)) + + def unset(self, *keys: str) -> None: + """Unset configuration for Slurm component.""" + # Sanity check to avoid modifying the .env if the input keys are invalid. + for key in keys: + if key not in self._keys: + raise SlurmOpsError( + f"Invalid configuration key `{key}` for service `{self._name}`" + ) + + for key in keys: + dotenv.unset_key(self._DOTENV_FILE, self.config_to_env_var(key)) + + class _SnapConfigManager(ConfigManager): """Control configuration of a Slurm component using Snap.""" @@ -417,26 +463,43 @@ def generate(self) -> None: self._mungectl("key", "generate") -def _call(cmd: str, *args: str, stdin: Optional[str] = None) -> str: - """Call a command with logging. +class SnapManager(SlurmOpsManager): + """Slurm ops manager that uses Snap as its package manager.""" - Raises: - SlurmOpsError: Raised if the command fails. - """ - cmd = [cmd, *args] - _logger.debug(f"Executing command {cmd}") - try: - return subprocess.check_output(cmd, input=stdin, stderr=subprocess.PIPE, text=True).strip() - except subprocess.CalledProcessError as e: - _logger.error(f"`{' '.join(cmd)}` failed") - _logger.error(f"stderr: {e.stderr.decode()}") - raise SlurmOpsError(f"command {cmd[0]} failed. reason:\n{e.stderr.decode()}") + _CONFIG_SERVICES = { + ServiceType.SLURMCTLD: _SnapConfigManager(ServiceType.SLURMCTLD), + ServiceType.SLURMDBD: _SnapConfigManager(ServiceType.SLURMDBD), + ServiceType.MUNGED: _SnapEnvConfigManager(ServiceType.MUNGED.value, ["max-thread-count"]), + ServiceType.SLURMD: _SnapEnvConfigManager(ServiceType.SLURMD.value, ["config-server"]), + ServiceType.PROMETHEUS_EXPORTER: _SnapEnvConfigManager( + ServiceType.PROMETHEUS_EXPORTER.value, [] + ), + ServiceType.SLURMRESTD: _SnapEnvConfigManager( + ServiceType.SLURMRESTD.value, ["max-connections", "max-thread-count"] + ), + } + def install(self) -> None: + """Install Slurm using the `slurm` snap.""" + # FIXME: Pin slurm to the stable channel + _snap("install", "slurm", "--channel", "latest/candidate", "--classic") -def _snap(*args) -> str: - """Control snap by via executed `snap ...` commands. + def version(self) -> str: + """Get the current version of the `slurm` snap installed on the system.""" + info = yaml.safe_load(_snap("info", "slurm")) + if (ver := info.get("installed")) is None: + raise SlurmOpsError("unable to retrive snap info. ensure slurm is correctly installed") + return ver.split(maxsplit=1)[0] - Raises: - subprocess.CalledProcessError: Raised if snap command fails. - """ - return _call("snap", *args) + def service_manager_for(self, type: ServiceType) -> ServiceManager: + """Return the `ServiceManager` for the specified `ServiceType`.""" + return _SnapServiceManager(type) + + def config_manager_for(self, type: ServiceType) -> Type[ConfigManager]: + """Return the `ConfigurationManager` for the specified `ServiceType`.""" + return self._CONFIG_SERVICES[type] + + @property + def munge_key_manager(self) -> Type[MungeKeyManager]: + """Get the `MungekeyManager` class of this ops manager.""" + return _SnapMungeKeyManager diff --git a/requirements.txt b/requirements.txt index 618ba75..7649a8a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,3 @@ ops ~= 2.5 +slurmutils ~= 0.4.0 +python-dotenv ~= 1.0.1 diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index 0086b02..8d94e6a 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -7,6 +7,7 @@ import base64 import subprocess from unittest import TestCase +from pyfakefs.fake_filesystem_unittest import TestCase as FsTestCase from unittest.mock import patch import charms.hpc_libs.v0.slurm_ops as slurm @@ -114,6 +115,9 @@ def test_call_error(self, subcmd) -> None: @patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output") class SlurmOpsBase: """Test the Slurm service operations managers.""" + def setUp(self): + self.setUpPyfakefs() + self.fs.create_file("/var/snap/slurm/common/.env") def test_config_name(self, *_) -> None: """Test that the config name is correctly set.""" @@ -249,7 +253,7 @@ def test_hostname(self, gethostname, *_) -> None: cls_name = f"Test{manager.service.type.name.capitalize()}Ops" globals()[cls_name] = type( cls_name, - (SlurmOpsBase, TestCase), + (SlurmOpsBase, FsTestCase), { "manager": manager, "config_name": config_name, diff --git a/tox.ini b/tox.ini index 801d23e..3bac066 100644 --- a/tox.ini +++ b/tox.ini @@ -50,6 +50,7 @@ commands = description = Run unit tests deps = pytest + pyfakefs coverage[toml] -r {tox_root}/requirements.txt commands =