Skip to content

Commit

Permalink
feat: implement AptManager for slurm_ops
Browse files Browse the repository at this point in the history
Modifies the integration tests to also test AptManager.
  • Loading branch information
jedel1043 committed Sep 12, 2024
1 parent 4e5e870 commit b577b79
Show file tree
Hide file tree
Showing 6 changed files with 1,617 additions and 29 deletions.
1 change: 1 addition & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
slurmutils ~= 0.6.0
python-dotenv ~= 1.0.1
pyyaml >= 6.0.2
distro ~=1.9.0

# tests deps
coverage[toml] ~= 7.6
Expand Down
238 changes: 220 additions & 18 deletions lib/charms/hpc_libs/v0/slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
This library contains manager classes that provide high-level interfaces
for managing Slurm operations within charmed operators.
### Note
This charm library depends on the `charms.operator_libs_linux.v0.apt` charm library, which can
be imported by running `charmcraft fetch-lib charms.operator_libs_linux.v0.apt`.
### Example Usage
#### Managing the `slurmctld` service
Expand Down Expand Up @@ -64,17 +69,34 @@ def _on_install(self, _) -> None:
import os
import socket
import subprocess
import textwrap
from abc import ABC, abstractmethod
from collections.abc import Mapping
from contextlib import contextmanager
from enum import Enum
from pathlib import Path
from typing import Any, Optional, Union

import distro
import dotenv
import yaml
from slurmutils.editors import slurmconfig, slurmdbdconfig


class SlurmOpsError(Exception):
"""Exception raised when a slurm operation failed."""

@property
def message(self) -> str:
"""Return message passed as argument to exception."""
return self.args[0]


try:
import charms.operator_libs_linux.v0.apt as apt
except ImportError as e:
raise SlurmOpsError(f"could not import apt charm library. reason: {e}")

# The unique Charmhub library identifier, never change it
LIBID = "541fd767f90b40539cf7cd6e7db8fabf"

Expand All @@ -86,7 +108,7 @@ def _on_install(self, _) -> None:
LIBPATCH = 6

# Charm library dependencies to fetch during `charmcraft pack`.
PYDEPS = ["pyyaml>=6.0.2", "python-dotenv~=1.0.1", "slurmutils~=0.6.0"]
PYDEPS = ["pyyaml>=6.0.2", "python-dotenv~=1.0.1", "slurmutils~=0.6.0", "distro~=1.9.0"]

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -116,15 +138,6 @@ def _snap(*args) -> str:
return _call("snap", *args)


class SlurmOpsError(Exception):
"""Exception raised when a slurm operation failed."""

@property
def message(self) -> str:
"""Return message passed as argument to exception."""
return self.args[0]


class ServiceType(Enum):
"""Type of Slurm service to manage."""

Expand Down Expand Up @@ -274,9 +287,7 @@ class SlurmManagerBase:
"""Base manager for Slurm services."""

def __init__(self, service: ServiceType, snap: bool = False) -> None:
if not snap:
raise SlurmOpsError("deb packaging is currently unimplemented")
self._ops_manager = SnapManager()
self._ops_manager = SnapManager() if snap else AptManager()
self.service = self._ops_manager.service_manager_for(service)
self.munge = MungeManager(self._ops_manager)
self.exporter = PrometheusExporterManager(self._ops_manager)
Expand Down Expand Up @@ -382,11 +393,14 @@ def type(self) -> ServiceType:
return self._service


class _SnapMungeKeyManager(MungeKeyManager):
"""Control the munge key using Snap."""
class _MungectlManager(MungeKeyManager):
"""Control the munge key using mungectl."""

def __init__(self, binpath: str) -> None:
self._binpath = binpath

def _mungectl(self, *args: str, stdin: Optional[str] = None) -> str:
"""Control munge via `slurm.mungectl ...`.
"""Control munge via `mungectl ...`.
Args:
*args: Arguments to pass to `mungectl`.
Expand All @@ -395,7 +409,7 @@ def _mungectl(self, *args: str, stdin: Optional[str] = None) -> str:
Raises:
subprocess.CalledProcessError: Raised if `mungectl` command fails.
"""
return _call("slurm.mungectl", *args, stdin=stdin)
return _call(self._binpath, *args, stdin=stdin)

def get(self) -> str:
"""Get the current munge key.
Expand Down Expand Up @@ -450,4 +464,192 @@ def _env_manager_for(self, type: ServiceType) -> _EnvManager:

def munge_key_manager(self) -> MungeKeyManager:
"""Get the `MungekeyManager` class of this ops manager."""
return _SnapMungeKeyManager()
return _MungectlManager("slurm.mungectl")


# ========================= deb manager =========================


class _SystemctlServiceManager(ServiceManager):
"""Control a Slurm service using systemctl services."""

def __init__(self, service: ServiceType) -> None:
self._service = service

def enable(self) -> None:
"""Enable service.
Raises:
SlurmOpsError: Raised if `systemctl enable ...` returns a non-zero returncode.
"""
_call("systemctl", "enable", "--now", self._service.value)

def disable(self) -> None:
"""Disable service."""
_call("systemctl", "disable", "--now", self._service.value)

def restart(self) -> None:
"""Restart service."""
_call("systemctl", "reload-or-restart", self._service.value)

def active(self) -> bool:
"""Return True if the service is active."""
cmd = ["systemctl", "is-active", "--quiet", self._service.value]
_logger.debug(f"Executing command {cmd}")
proc = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
bufsize=1,
encoding="utf-8",
)
_logger.debug(f"command {cmd} exit code: {proc.returncode}. output:\n{proc.stdout}")
return proc.returncode == 0

@property
def type(self) -> ServiceType:
"""Return the service type of the managed service."""
return self._service


UBUNTU_HPC_SLURM_WLM_KEY = """
-----BEGIN PGP PUBLIC KEY BLOCK-----
Comment: Hostname:
Version: Hockeypuck 2.2
xsFNBGTuZb8BEACtJ1CnZe6/hv84DceHv+a54y3Pqq0gqED0xhTKnbj/E2ByJpmT
NlDNkpeITwPAAN1e3824Me76Qn31RkogTMoPJ2o2XfG253RXd67MPxYhfKTJcnM3
CEkmeI4u2Lynh3O6RQ08nAFS2AGTeFVFH2GPNWrfOsGZW03Jas85TZ0k7LXVHiBs
W6qonbsFJhshvwC3SryG4XYT+z/+35x5fus4rPtMrrEOD65hij7EtQNaE8owuAju
Kcd0m2b+crMXNcllWFWmYMV0VjksQvYD7jwGrWeKs+EeHgU8ZuqaIP4pYHvoQjag
umqnH9Qsaq5NAXiuAIAGDIIV4RdAfQIR4opGaVgIFJdvoSwYe3oh2JlrLPBlyxyY
dayDifd3X8jxq6/oAuyH1h5K/QLs46jLSR8fUbG98SCHlRmvozTuWGk+e07ALtGe
sGv78ToHKwoM2buXaTTHMwYwu7Rx8LZ4bZPHdersN1VW/m9yn1n5hMzwbFKy2s6/
D4Q2ZBsqlN+5aW2q0IUmO+m0GhcdaDv8U7RVto1cWWPr50HhiCi7Yvei1qZiD9jq
57oYZVqTUNCTPxi6NeTOdEc+YqNynWNArx4PHh38LT0bqKtlZCGHNfoAJLPVYhbB
b2AHj9edYtHU9AAFSIy+HstET6P0UDxy02IeyE2yxoUBqdlXyv6FL44E+wARAQAB
zRxMYXVuY2hwYWQgUFBBIGZvciBVYnVudHUgSFBDwsGOBBMBCgA4FiEErocSHcPk
oLD4H/Aj9tDF1ca+s3sFAmTuZb8CGwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AA
CgkQ9tDF1ca+s3sz3w//RNawsgydrutcbKf0yphDhzWS53wgfrs2KF1KgB0u/H+u
6Kn2C6jrVM0vuY4NKpbEPCduOj21pTCepL6PoCLv++tICOLVok5wY7Zn3WQFq0js
Iy1wO5t3kA1cTD/05v/qQVBGZ2j4DsJo33iMcQS5AjHvSr0nu7XSvDDEE3cQE55D
87vL7lgGjuTOikPh5FpCoS1gpemBfwm2Lbm4P8vGOA4/witRjGgfC1fv1idUnZLM
TbGrDlhVie8pX2kgB6yTYbJ3P3kpC1ZPpXSRWO/cQ8xoYpLBTXOOtqwZZUnxyzHh
gM+hv42vPTOnCo+apD97/VArsp59pDqEVoAtMTk72fdBqR+BB77g2hBkKESgQIEq
EiE1/TOISioMkE0AuUdaJ2ebyQXugSHHuBaqbEC47v8t5DVN5Qr9OriuzCuSDNFn
6SBHpahN9ZNi9w0A/Yh1+lFfpkVw2t04Q2LNuupqOpW+h3/62AeUqjUIAIrmfeML
IDRE2VdquYdIXKuhNvfpJYGdyvx/wAbiAeBWg0uPSepwTfTG59VPQmj0FtalkMnN
ya2212K5q68O5eXOfCnGeMvqIXxqzpdukxSZnLkgk40uFJnJVESd/CxHquqHPUDE
fy6i2AnB3kUI27D4HY2YSlXLSRbjiSxTfVwNCzDsIh7Czefsm6ITK2+cVWs0hNQ=
=cs1s
-----END PGP PUBLIC KEY BLOCK-----
"""

UBUNTU_HPC_EXPERIMENTAL_KEY = """
-----BEGIN PGP PUBLIC KEY BLOCK-----
Comment: Hostname:
Version: Hockeypuck 2.2
xsFNBGTuZb8BEACtJ1CnZe6/hv84DceHv+a54y3Pqq0gqED0xhTKnbj/E2ByJpmT
NlDNkpeITwPAAN1e3824Me76Qn31RkogTMoPJ2o2XfG253RXd67MPxYhfKTJcnM3
CEkmeI4u2Lynh3O6RQ08nAFS2AGTeFVFH2GPNWrfOsGZW03Jas85TZ0k7LXVHiBs
W6qonbsFJhshvwC3SryG4XYT+z/+35x5fus4rPtMrrEOD65hij7EtQNaE8owuAju
Kcd0m2b+crMXNcllWFWmYMV0VjksQvYD7jwGrWeKs+EeHgU8ZuqaIP4pYHvoQjag
umqnH9Qsaq5NAXiuAIAGDIIV4RdAfQIR4opGaVgIFJdvoSwYe3oh2JlrLPBlyxyY
dayDifd3X8jxq6/oAuyH1h5K/QLs46jLSR8fUbG98SCHlRmvozTuWGk+e07ALtGe
sGv78ToHKwoM2buXaTTHMwYwu7Rx8LZ4bZPHdersN1VW/m9yn1n5hMzwbFKy2s6/
D4Q2ZBsqlN+5aW2q0IUmO+m0GhcdaDv8U7RVto1cWWPr50HhiCi7Yvei1qZiD9jq
57oYZVqTUNCTPxi6NeTOdEc+YqNynWNArx4PHh38LT0bqKtlZCGHNfoAJLPVYhbB
b2AHj9edYtHU9AAFSIy+HstET6P0UDxy02IeyE2yxoUBqdlXyv6FL44E+wARAQAB
zRxMYXVuY2hwYWQgUFBBIGZvciBVYnVudHUgSFBDwsGOBBMBCgA4FiEErocSHcPk
oLD4H/Aj9tDF1ca+s3sFAmTuZb8CGwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AA
CgkQ9tDF1ca+s3sz3w//RNawsgydrutcbKf0yphDhzWS53wgfrs2KF1KgB0u/H+u
6Kn2C6jrVM0vuY4NKpbEPCduOj21pTCepL6PoCLv++tICOLVok5wY7Zn3WQFq0js
Iy1wO5t3kA1cTD/05v/qQVBGZ2j4DsJo33iMcQS5AjHvSr0nu7XSvDDEE3cQE55D
87vL7lgGjuTOikPh5FpCoS1gpemBfwm2Lbm4P8vGOA4/witRjGgfC1fv1idUnZLM
TbGrDlhVie8pX2kgB6yTYbJ3P3kpC1ZPpXSRWO/cQ8xoYpLBTXOOtqwZZUnxyzHh
gM+hv42vPTOnCo+apD97/VArsp59pDqEVoAtMTk72fdBqR+BB77g2hBkKESgQIEq
EiE1/TOISioMkE0AuUdaJ2ebyQXugSHHuBaqbEC47v8t5DVN5Qr9OriuzCuSDNFn
6SBHpahN9ZNi9w0A/Yh1+lFfpkVw2t04Q2LNuupqOpW+h3/62AeUqjUIAIrmfeML
IDRE2VdquYdIXKuhNvfpJYGdyvx/wAbiAeBWg0uPSepwTfTG59VPQmj0FtalkMnN
ya2212K5q68O5eXOfCnGeMvqIXxqzpdukxSZnLkgk40uFJnJVESd/CxHquqHPUDE
fy6i2AnB3kUI27D4HY2YSlXLSRbjiSxTfVwNCzDsIh7Czefsm6ITK2+cVWs0hNQ=
=cs1s
-----END PGP PUBLIC KEY BLOCK-----
"""


class AptManager(SlurmOpsManager):
"""Slurm ops manager that uses apt as its package manager."""

def install(self) -> None:
"""Install Slurm using the `slurm` snap."""
slurm_wlm = apt.DebianRepository(
enabled=True,
repotype="deb",
uri="https://ppa.launchpadcontent.net/ubuntu-hpc/slurm-wlm-23.02/ubuntu",
release=distro.codename(),
groups=["main"],
)
slurm_wlm.import_key(UBUNTU_HPC_SLURM_WLM_KEY)

experimental = apt.DebianRepository(
enabled=True,
repotype="deb",
uri="https://ppa.launchpadcontent.net/ubuntu-hpc/experimental/ubuntu",
release=distro.codename(),
groups=["main"],
)
experimental.import_key(UBUNTU_HPC_EXPERIMENTAL_KEY)

repositories = apt.RepositoryMapping()
repositories.add(slurm_wlm)
repositories.add(experimental)

apt.update()
for package in ["slurm-wlm", "mungectl", "prometheus-slurm-exporter"]:
try:
apt.add_package(package)
except apt.PackageNotFoundError as e:
raise SlurmOpsError(f"package {package} not found. reason: {e}")
except apt.PackageError as e:
raise SlurmOpsError(f"failed to install package {package}. reason: {e}")

override = Path("/etc/systemd/system/slurmd.service.d/10-slurmd-conf-server.conf")
override.parent.mkdir(exist_ok=True, parents=True)
override.write_text(
textwrap.dedent(
"""
[Service]
ExecStart=/usr/bin/sh -c "/usr/sbin/slurmd -D -s $${SLURMD_CONFIG_SERVER:+--conf-server $$SLURMD_CONFIG_SERVER} $$SLURMD_OPTIONS"
"""
)
)

def version(self) -> str:
"""Get the current version of the `slurm-wlm` installed on the system."""
try:
return apt.DebianPackage.from_installed_package("slurm-wlm").version.number
except apt.PackageNotFoundError as e:
_logger.error(e)
raise SlurmOpsError(
"unable to retrieve slurm-wlm version. ensure slurm-wlm is correctly installed"
)

@property
def slurm_path(self) -> Path:
"""Get the path to the Slurm configuration directory."""
return Path("/etc/slurm")

def service_manager_for(self, type: ServiceType) -> ServiceManager:
"""Return the `ServiceManager` for the specified `ServiceType`."""
return _SystemctlServiceManager(type)

def _env_manager_for(self, type: ServiceType) -> _EnvManager:
"""Return the `_EnvManager` for the specified `ServiceType`."""
return _EnvManager(file=f"/etc/default/{type.value}", prefix=type.value)

def munge_key_manager(self) -> MungeKeyManager:
"""Get the `MungekeyManager` class of this ops manager."""
return _MungectlManager("mungectl")
Loading

0 comments on commit b577b79

Please sign in to comment.