Skip to content

Commit

Permalink
Implement SnapEnvConfigManager (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
jedel1043 committed Aug 22, 2024
1 parent b44402d commit e76c8e3
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 64 deletions.
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
pytest ~= 7.2
pytest-order ~= 1.1
pytest-order ~= 1.1
187 changes: 125 additions & 62 deletions lib/charms/hpc_libs/v0/slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ def _on_install(self, _) -> None:
from abc import ABC, abstractmethod
from collections.abc import Mapping
from enum import Enum
from pathlib import Path
from typing import Any, Optional, Type

import dotenv
import yaml

# The unique Charmhub library identifier, never change it
Expand All @@ -90,13 +92,38 @@ def _on_install(self, _) -> None:
LIBPATCH = 6

# Charm library dependencies to fetch during `charmcraft pack`.
PYDEPS = ["pyyaml>=6.0.1"]
PYDEPS = ["pyyaml>=6.0.1", "python-dotenv~=1.0.1"]

_logger = logging.getLogger(__name__)
_acronym = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])")
_kebabize = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")


def _call(cmd: str, *args: str, stdin: Optional[str] = None) -> str:
"""Call a command with logging.
Raises:
SlurmOpsError: Raised if the command fails.
"""
cmd = [cmd, *args]
_logger.debug(f"Executing command {cmd}")
try:
return subprocess.check_output(cmd, input=stdin, stderr=subprocess.PIPE, text=True).strip()
except subprocess.CalledProcessError as e:
_logger.error(f"`{' '.join(cmd)}` failed")
_logger.error(f"stderr: {e.stderr.decode()}")
raise SlurmOpsError(f"command {cmd[0]} failed. reason:\n{e.stderr.decode()}")


def _snap(*args) -> str:
"""Control snap by via executed `snap ...` commands.
Raises:
subprocess.CalledProcessError: Raised if snap command fails.
"""
return _call("snap", *args)


def format_key(key: str) -> str:
"""Format Slurm configuration keys from SlurmCASe into kebab case.
Expand Down Expand Up @@ -240,15 +267,13 @@ def install(self) -> None:
def version(self) -> str:
"""Get the current version of Slurm installed on the system."""

@property
@abstractmethod
def service_manager(self) -> Type[ServiceManager]:
"""Get the `ServiceManager` class of this ops manager."""
def service_manager_for(self, type: ServiceType) -> Type[ServiceManager]:
"""Return the `ServiceManager` for the specified `ServiceType`."""

@property
@abstractmethod
def config_manager(self) -> Type[ConfigManager]:
"""Get the `ConfigurationManager` class of this ops manager."""
def config_manager_for(self, type: ServiceType) -> Type[ConfigManager]:
"""Return the `ConfigManager` for the specified `ServiceType`."""

@property
@abstractmethod
Expand All @@ -260,16 +285,16 @@ class MungeManager:
"""Manage `munged` service operations."""

def __init__(self, ops_manager: SlurmOpsManager) -> None:
self.service = ops_manager.service_manager(ServiceType.MUNGED)
self.config = ops_manager.config_manager(ServiceType.MUNGED)
self.service = ops_manager.service_manager_for(ServiceType.MUNGED)
self.config = ops_manager.config_manager_for(ServiceType.MUNGED)
self.key = ops_manager.munge_key_manager()


class PrometheusExporterManager:
"""Manage `slurm-prometheus-exporter` service operations."""

def __init__(self, ops_manager: SlurmOpsManager) -> None:
self.service = ops_manager.service_manager(ServiceType.PROMETHEUS_EXPORTER)
self.service = ops_manager.service_manager_for(ServiceType.PROMETHEUS_EXPORTER)


class SlurmManagerBase:
Expand All @@ -279,8 +304,8 @@ def __init__(self, service: ServiceType, snap: bool = False) -> None:
if not snap:
raise SlurmOpsError("deb packaging is currently unimplemented")
ops_manager = SnapManager()
self.service = ops_manager.service_manager(service)
self.config = ops_manager.config_manager(service)
self.service = ops_manager.service_manager_for(service)
self.config = ops_manager.config_manager_for(service)
self.munge = MungeManager(ops_manager)
self.exporter = PrometheusExporterManager(ops_manager)
self.install = ops_manager.install
Expand All @@ -292,37 +317,6 @@ def hostname(self) -> str:
return socket.gethostname().split(".")[0]


class SnapManager(SlurmOpsManager):
"""Slurm ops manager that uses Snap as its package manager."""

def install(self) -> None:
"""Install Slurm using the `slurm` snap."""
# FIXME: Pin slurm to the stable channel
_snap("install", "slurm", "--channel", "latest/candidate", "--classic")

def version(self) -> str:
"""Get the current version of the `slurm` snap installed on the system."""
info = yaml.safe_load(_snap("info", "slurm"))
if (ver := info.get("installed")) is None:
raise SlurmOpsError("unable to retrive snap info. ensure slurm is correctly installed")
return ver.split(maxsplit=1)[0]

@property
def service_manager(self) -> Type[ServiceManager]:
"""Get the `ServiceManager` class of `SnapManager`."""
return _SnapServiceManager

@property
def config_manager(self) -> Type[ConfigManager]:
"""Get the `ConfigurationManager` class of this ops manager."""
return _SnapConfigManager

@property
def munge_key_manager(self) -> Type[MungeKeyManager]:
"""Get the `MungekeyManager` class of this ops manager."""
return _SnapMungeKeyManager


class _SnapServiceManager(ServiceManager):
"""Control a Slurm service."""

Expand Down Expand Up @@ -358,6 +352,58 @@ def type(self) -> ServiceType:
return self._service


class _SnapEnvConfigManager(ConfigManager):
"""Control configuration of a Slurm Snap component that uses environment variables to manage settings."""

_DOTENV_FILE = Path("/var/snap/slurm/common/.env")

def __init__(self, prefix: str, keys: [str]) -> None:
self._name: str = prefix
self._keys: [str] = keys

def config_to_env_var(self, key: str) -> str:
"""Get the environment variable corresponding to the configuration `key`."""
return self._name.replace("-", "_").upper() + "_" + key.replace("-", "_").upper()

def get(self, key: Optional[str] = None) -> Any:
"""Get specific configuration value for Slurm component."""
if not key:
return {
k: value
for k in self._keys
if (value := dotenv.get_key(self._DOTENV_FILE, self.config_to_env_var(k)))
}

if key not in self._keys:
raise SlurmOpsError(f"invalid configuration key `{key}` for service `{self._name}`")

return dotenv.get_key(self._DOTENV_FILE, self.config_to_env_var(key))

def set(self, config: Mapping[str, Any]) -> None:
"""Set configuration for Slurm component."""
# Sanity check to avoid modifying the .env if the input keys are invalid.

Check failure on line 384 in lib/charms/hpc_libs/v0/slurm_ops.py

View workflow job for this annotation

GitHub Actions / Inclusive naming check

`Sanity` may be insensitive, use `confidence`, `quick check`, `coherence check` instead
for key in config.keys():
if key not in self._keys:
raise SlurmOpsError(
f"invalid configuration key `{key}` for service `{self._name}`"
)

for key, value in config.values():
dotenv.set_key(self._DOTENV_FILE, self.config_to_env_var(key), str(value))

def unset(self, *keys: str) -> None:
"""Unset configuration for Slurm component."""
# Sanity check to avoid modifying the .env if the input keys are invalid.

Check failure on line 396 in lib/charms/hpc_libs/v0/slurm_ops.py

View workflow job for this annotation

GitHub Actions / Inclusive naming check

`Sanity` may be insensitive, use `confidence`, `quick check`, `coherence check` instead
for key in keys:
if key not in self._keys:
raise SlurmOpsError(
f"Invalid configuration key `{key}` for service `{self._name}`"
)

for key in keys:
dotenv.unset_key(self._DOTENV_FILE, self.config_to_env_var(key))


class _SnapConfigManager(ConfigManager):
"""Control configuration of a Slurm component using Snap."""

Expand Down Expand Up @@ -417,26 +463,43 @@ def generate(self) -> None:
self._mungectl("key", "generate")


def _call(cmd: str, *args: str, stdin: Optional[str] = None) -> str:
"""Call a command with logging.
class SnapManager(SlurmOpsManager):
"""Slurm ops manager that uses Snap as its package manager."""

Raises:
SlurmOpsError: Raised if the command fails.
"""
cmd = [cmd, *args]
_logger.debug(f"Executing command {cmd}")
try:
return subprocess.check_output(cmd, input=stdin, stderr=subprocess.PIPE, text=True).strip()
except subprocess.CalledProcessError as e:
_logger.error(f"`{' '.join(cmd)}` failed")
_logger.error(f"stderr: {e.stderr.decode()}")
raise SlurmOpsError(f"command {cmd[0]} failed. reason:\n{e.stderr.decode()}")
_CONFIG_SERVICES = {
ServiceType.SLURMCTLD: _SnapConfigManager(ServiceType.SLURMCTLD),
ServiceType.SLURMDBD: _SnapConfigManager(ServiceType.SLURMDBD),
ServiceType.MUNGED: _SnapEnvConfigManager(ServiceType.MUNGED.value, ["max-thread-count"]),
ServiceType.SLURMD: _SnapEnvConfigManager(ServiceType.SLURMD.value, ["config-server"]),
ServiceType.PROMETHEUS_EXPORTER: _SnapEnvConfigManager(
ServiceType.PROMETHEUS_EXPORTER.value, []
),
ServiceType.SLURMRESTD: _SnapEnvConfigManager(
ServiceType.SLURMRESTD.value, ["max-connections", "max-thread-count"]
),
}

def install(self) -> None:
"""Install Slurm using the `slurm` snap."""
# FIXME: Pin slurm to the stable channel
_snap("install", "slurm", "--channel", "latest/candidate", "--classic")

def _snap(*args) -> str:
"""Control snap by via executed `snap ...` commands.
def version(self) -> str:
"""Get the current version of the `slurm` snap installed on the system."""
info = yaml.safe_load(_snap("info", "slurm"))
if (ver := info.get("installed")) is None:
raise SlurmOpsError("unable to retrive snap info. ensure slurm is correctly installed")
return ver.split(maxsplit=1)[0]

Raises:
subprocess.CalledProcessError: Raised if snap command fails.
"""
return _call("snap", *args)
def service_manager_for(self, type: ServiceType) -> ServiceManager:
"""Return the `ServiceManager` for the specified `ServiceType`."""
return _SnapServiceManager(type)

def config_manager_for(self, type: ServiceType) -> Type[ConfigManager]:
"""Return the `ConfigurationManager` for the specified `ServiceType`."""
return self._CONFIG_SERVICES[type]

@property
def munge_key_manager(self) -> Type[MungeKeyManager]:
"""Get the `MungekeyManager` class of this ops manager."""
return _SnapMungeKeyManager
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
ops ~= 2.5
slurmutils ~= 0.4.0
python-dotenv ~= 1.0.1
6 changes: 5 additions & 1 deletion tests/unit/test_slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import base64
import subprocess
from unittest import TestCase
from pyfakefs.fake_filesystem_unittest import TestCase as FsTestCase
from unittest.mock import patch

import charms.hpc_libs.v0.slurm_ops as slurm
Expand Down Expand Up @@ -114,6 +115,9 @@ def test_call_error(self, subcmd) -> None:
@patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output")
class SlurmOpsBase:
"""Test the Slurm service operations managers."""
def setUp(self):
self.setUpPyfakefs()
self.fs.create_file("/var/snap/slurm/common/.env")

def test_config_name(self, *_) -> None:
"""Test that the config name is correctly set."""
Expand Down Expand Up @@ -249,7 +253,7 @@ def test_hostname(self, gethostname, *_) -> None:
cls_name = f"Test{manager.service.type.name.capitalize()}Ops"
globals()[cls_name] = type(
cls_name,
(SlurmOpsBase, TestCase),
(SlurmOpsBase, FsTestCase),
{
"manager": manager,
"config_name": config_name,
Expand Down
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ commands =
description = Run unit tests
deps =
pytest
pyfakefs
coverage[toml]
-r {tox_root}/requirements.txt
commands =
Expand Down

0 comments on commit e76c8e3

Please sign in to comment.