Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: use composition and inheritance to build service managers #8

Merged
merged 8 commits into from
Jul 10, 2024
268 changes: 164 additions & 104 deletions lib/charms/hpc_libs/v0/slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,55 +13,66 @@
# limitations under the License.


"""Library to manage the Slurm snap.
"""Abstractions for managing Slurm operations via snap.

This library contains the `SlurmManager` class, which offers interfaces to use and manage
the Slurm snap inside charms.
This library contains the `SlurmManagerBase` and `ServiceType` class
which provide high-level interfaces for managing Slurm within charmed operators.

### General usage
### Example Usage

For starters, the `SlurmManager` constructor receives a `Service` enum as a parameter, which
helps the manager determine things like the correct service to enable, or the correct settings
key to mutate.
#### Managing a Slurm service

The `SlurmManagerBase` constructor receives a `ServiceType` enum. The enum instructs
the inheriting Slurm service manager how to manage its corresponding Slurm service on the host.

```python3
import charms.hpc_libs.v0.slurm_ops as slurm
from charms.hpc_libs.v0.slurm_ops import SlurmManagerBase, ServiceType

class SlurmctldManager(SlurmManagerBase):
# Manage `slurmctld` service on host.

def __init__(self) -> None:
super().__init__(ServiceType.SLURMCTLD)

```
from charms.hpc_libs.v0.slurm_ops import (
Service,
SlurmManager,
)

class ApplicationCharm(CharmBase):
# Application charm that needs to use the Slurm snap.

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)

# Charm events defined in the NFSRequires class.
self._slurm_manager = SlurmManager(Service.SLURMCTLD)
self._slurm_manager = SlurmctldManager()
self.framework.observe(
self.on.install,
self._on_install,
)

def _on_install(self, _) -> None:
self._slurm_manager.install()
self.unit.set_workload_version(self._slurm_manager.version())
self._slurm_manager.set_config("cluster-name", "cluster")
slurm.install()
self.unit.set_workload_version(slurm.version())
self._slurm_manager.config.set({"cluster-name": "cluster"})
```
"""

import base64
import enum
import functools
__all__ = [
"format_key",
"install",
"version",
"ServiceType",
"SlurmManagerBase",
]

import json
import logging
import os
import re
import subprocess
import tempfile
from collections.abc import Mapping
from enum import Enum
from typing import Any, Optional

import yaml

_logger = logging.getLogger(__name__)

# The unique Charmhub library identifier, never change it
LIBID = "541fd767f90b40539cf7cd6e7db8fabf"

Expand All @@ -72,11 +83,49 @@ def _on_install(self, _) -> None:
# to 0 if you are raising the major API version
LIBPATCH = 1


# Charm library dependencies to fetch during `charmcraft pack`.
PYDEPS = ["pyyaml>=6.0.1"]

_logger = logging.getLogger(__name__)
_acronym = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])")
_kebabize = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")


def format_key(key: str) -> str:
"""Format Slurm configuration keys from SlurmCASe into kebab case.

def _call(cmd: str, *args: [str]) -> bytes:
Args:
key: Slurm configuration key to convert to kebab case.

Notes:
Slurm configuration syntax does not follow proper PascalCasing
format, so we cannot put keys directly through a kebab case converter
to get the desired format. Some additional processing is needed for
certain keys before the key can properly kebabized.

For example, without additional preprocessing, the key `CPUs` will
become `cp-us` if put through a kebabizer with being preformatted to `Cpus`.
NucciTheBoss marked this conversation as resolved.
Show resolved Hide resolved
"""
if "CPUs" in key:
key = key.replace("CPUs", "Cpus")
key = _acronym.sub(r"-", key)
return _kebabize.sub(r"-", key).lower()


def install() -> None:
"""Install Slurm."""
# FIXME: Pin slurm to the stable channel
_snap("install", "slurm", "--channel", "latest/candidate", "--classic")


def version() -> str:
"""Get the current version of Slurm installed on the system."""
info = yaml.safe_load(_snap("info", "slurm"))
ver: str = info["installed"]
return ver.split(maxsplit=1)[0]


def _call(cmd: str, *args: str, stdin: Optional[str] = None) -> str:
"""Call a command with logging.

Raises:
Expand All @@ -85,7 +134,7 @@ def _call(cmd: str, *args: [str]) -> bytes:
cmd = [cmd, *args]
_logger.debug(f"Executing command {cmd}")
try:
return subprocess.check_output(cmd, stderr=subprocess.PIPE, text=False)
return subprocess.check_output(cmd, input=stdin, stderr=subprocess.PIPE, text=True).strip()
except subprocess.CalledProcessError as e:
_logger.error(f"`{' '.join(cmd)}` failed")
_logger.error(f"stderr: {e.stderr.decode()}")
Expand All @@ -98,16 +147,26 @@ def _snap(*args) -> str:
Raises:
subprocess.CalledProcessError: Raised if snap command fails.
"""
return _call("snap", *args).decode()
return _call("snap", *args)


_get_config = functools.partial(_snap, "get", "slurm")
_set_config = functools.partial(_snap, "set", "slurm")
def _mungectl(*args: str, stdin: Optional[str] = None) -> str:
"""Control munge via `slurm.mungectl ...`.

Args:
*args: Arguments to pass to `mungectl`.
stdin: Input to pass to `mungectl` via stdin.

class Service(enum.Enum):
"""Type of Slurm service that will be managed by `SlurmManager`."""
Raises:
subprocess.CalledProcessError: Raised if `mungectl` command fails.
"""
return _call("slurm.mungectl", *args, stdin=stdin)


class ServiceType(Enum):
"""Type of Slurm service to manage."""

MUNGED = "munged"
SLURMD = "slurmd"
SLURMCTLD = "slurmctld"
SLURMDBD = "slurmdbd"
Expand All @@ -116,94 +175,95 @@ class Service(enum.Enum):
@property
def config_name(self) -> str:
"""Configuration name on the slurm snap for this service type."""
if self is Service.SLURMCTLD:
if self is ServiceType.SLURMCTLD:
return "slurm"
if self is ServiceType.MUNGED:
return "munge"

return self.value


class SlurmManager:
"""Slurm snap manager.
class ServiceManager:
"""Control a Slurm service."""

This class offers methods to manage the Slurm snap for a certain service type.
The list of available services is specified by the `Service` enum.
"""
def enable(self) -> None:
"""Enable service."""
_snap("start", "--enable", f"slurm.{self._service.value}")

def disable(self) -> None:
"""Disable service."""
_snap("stop", "--disable", f"slurm.{self._service.value}")

def restart(self) -> None:
"""Restart service."""
_snap("restart", f"slurm.{self._service.value}")


class ConfigurationManager:
"""Control configuration of a Slurm component."""

def __init__(self, service: Service):
def __init__(self, service: ServiceType) -> None:
self._service = service

def install(self):
"""Install the slurm snap in this system."""
# TODO: Pin slurm to the stable channel
_snap("install", "slurm", "--channel", "latest/candidate", "--classic")
def get_options(self, *keys: str) -> Mapping[str, Any]:
"""Get given configurations values for Slurm component."""
configs = {}
for key in keys:
config = self.get(key)
target = key.rsplit(".", maxsplit=1)[-1]
configs[target] = config

def enable(self):
"""Start and enable the managed slurm service and the munged service."""
_snap("start", "--enable", "slurm.munged")
_snap("start", "--enable", f"slurm.{self._service.value}")
return configs

def restart(self):
"""Restart the managed slurm service."""
_snap("restart", f"slurm.{self._service.value}")
def get(self, key: str) -> Any:
"""Get specific configuration value for Slurm component."""
key = f"{self._service.config_name}.{key}"
config = json.loads(_snap("get", "-d", "slurm", key))
return config[key]

def restart_munged(self):
"""Restart the munged service."""
_snap("restart", "slurm.munged")
def set(self, config: Mapping[str, Any]) -> None:
"""Set configuration for Slurm component."""
args = [f"{self._service.config_name}.{k}={json.dumps(v)}" for k, v in config.items()]
_snap("set", "slurm", *args)

def disable(self):
"""Disable the managed slurm service and the munged service."""
_snap("stop", "--disable", "slurm.munged")
_snap("stop", "--disable", f"slurm.{self._service.value}")
def unset(self, *keys) -> None:
"""Unset configuration for Slurm component."""
args = [f"{self._service.config_name}.{k}!" for k in keys]
_snap("unset", "slurm", *args)

def set_config(self, key: str, value: str):
"""Set a snap config for the managed slurm service.

See the configuration section from the [Slurm readme](https://github.com/charmed-hpc/slurm-snap#configuration)
for a list of all the available configurations.
class MungeManager(ServiceManager):
"""Manage `munged` service operations."""

Note that this will only allow configuring the settings that are exclusive to
the specific managed service. (the slurmctld service uses the slurm parent key)
"""
_set_config(f"{self._service.config_name}.{key}={value}")
def __init__(self) -> None:
self._service = ServiceType.MUNGED
self.config = ConfigurationManager(ServiceType.MUNGED)

def get_key(self) -> str:
"""Get the current munge key.

def get_config(self, key: str) -> str:
"""Get a snap config for the managed slurm service.
Returns:
The current munge key as a base64-encoded string.
"""
return _mungectl("key", "get")

See the configuration section from the [Slurm readme](https://github.com/charmed-hpc/slurm-snap#configuration)
for a list of all the available configurations.
def set_key(self, key: str) -> None:
"""Set a new munge key.

Note that this will only allow fetching the settings that are exclusive to
the specific managed service. (the slurmctld service uses the slurm parent key)
Args:
key: A new, base64-encoded munge key.
"""
# Snap returns the config value with an additional newline at the end.
return _get_config(f"{self._service.config_name}.{key}").strip()

def generate_munge_key(self) -> bytes:
"""Generate a new cryptographically secure munged key."""
handle, path = tempfile.mkstemp()
try:
_call("mungekey", "-f", "-k", path)
os.close(handle)
with open(path, "rb") as f:
return f.read()
finally:
os.remove(path)

def set_munge_key(self, key: bytes):
"""Set the current munged key."""
# TODO: use `slurm.setmungekey` when implemented
# subprocess.run(["slurm.setmungekey"], stdin=key)
key = base64.b64encode(key).decode()
_set_config(f"munge.key={key}")

def get_munge_key(self) -> bytes:
"""Get the current munged key."""
# TODO: use `slurm.setmungekey` when implemented
# key = subprocess.run(["slurm.getmungekey"])
key = _get_config("munge.key")
return base64.b64decode(key)

def version(self) -> str:
"""Get the installed Slurm version of the snap."""
info = yaml.safe_load(_snap("info", "slurm"))
version: str = info["installed"]
return version.split(maxsplit=1)[0]
_mungectl("key", "set", stdin=key)

def generate_key(self) -> None:
"""Generate a new, cryptographically secure munge key."""
_mungectl("key", "generate")


class SlurmManagerBase(ServiceManager):
"""Base manager for Slurm services."""

def __init__(self, service: ServiceType) -> None:
self._service = service
self.config = ConfigurationManager(service)
self.munge = MungeManager()
Loading