Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(slurm_ops)!: implement SackdManager for sackd service #55

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 61 additions & 56 deletions lib/charms/hpc_libs/v0/slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def _on_install(self, _) -> None:
"""

__all__ = [
"SackdManager",
"SlurmOpsError",
"SlurmctldManager",
"SlurmdManager",
Expand Down Expand Up @@ -179,6 +180,7 @@ class _ServiceType(Enum):

MUNGE = "munge"
PROMETHEUS_EXPORTER = "prometheus-slurm-exporter"
SACKD = "sackd"
SLURMD = "slurmd"
SLURMCTLD = "slurmctld"
SLURMDBD = "slurmdbd"
Expand Down Expand Up @@ -425,12 +427,12 @@ class _SnapManager(_OpsManager):
def install(self) -> None:
"""Install Slurm using the `slurm` snap."""
# TODO: https://github.com/charmed-hpc/hpc-libs/issues/35 -
# Pin Slurm snap to stable channel.
# Pin Slurm snap to stable channel.
_snap("install", "slurm", "--channel", "latest/candidate", "--classic")
# TODO: https://github.com/charmed-hpc/slurm-snap/issues/49 -
# Request automatic alias for the Slurm snap so we don't need to do it here.
# We will possibly need to account for a third-party Slurm snap installation
# where aliasing is not automatically performed.
# Request automatic alias for the Slurm snap so we don't need to do it here.
# We will possibly need to account for a third-party Slurm snap installation
# where aliasing is not automatically performed.
_snap("alias", "slurm.mungectl", "mungectl")

def version(self) -> str:
Expand Down Expand Up @@ -514,49 +516,6 @@ def _init_ubuntu_hpc_ppa() -> None:
SlurmOpsError: Raised if `apt` fails to update with Ubuntu HPC repositories enabled.
"""
_logger.debug("initializing apt to use ubuntu hpc debian package repositories")
slurm_wlm = apt.DebianRepository(
enabled=True,
repotype="deb",
uri="https://ppa.launchpadcontent.net/ubuntu-hpc/slurm-wlm-23.02/ubuntu",
release=distro.codename(),
groups=["main"],
)
slurm_wlm.import_key(
textwrap.dedent(
"""
-----BEGIN PGP PUBLIC KEY BLOCK-----
Comment: Hostname:
Version: Hockeypuck 2.2

xsFNBGTuZb8BEACtJ1CnZe6/hv84DceHv+a54y3Pqq0gqED0xhTKnbj/E2ByJpmT
NlDNkpeITwPAAN1e3824Me76Qn31RkogTMoPJ2o2XfG253RXd67MPxYhfKTJcnM3
CEkmeI4u2Lynh3O6RQ08nAFS2AGTeFVFH2GPNWrfOsGZW03Jas85TZ0k7LXVHiBs
W6qonbsFJhshvwC3SryG4XYT+z/+35x5fus4rPtMrrEOD65hij7EtQNaE8owuAju
Kcd0m2b+crMXNcllWFWmYMV0VjksQvYD7jwGrWeKs+EeHgU8ZuqaIP4pYHvoQjag
umqnH9Qsaq5NAXiuAIAGDIIV4RdAfQIR4opGaVgIFJdvoSwYe3oh2JlrLPBlyxyY
dayDifd3X8jxq6/oAuyH1h5K/QLs46jLSR8fUbG98SCHlRmvozTuWGk+e07ALtGe
sGv78ToHKwoM2buXaTTHMwYwu7Rx8LZ4bZPHdersN1VW/m9yn1n5hMzwbFKy2s6/
D4Q2ZBsqlN+5aW2q0IUmO+m0GhcdaDv8U7RVto1cWWPr50HhiCi7Yvei1qZiD9jq
57oYZVqTUNCTPxi6NeTOdEc+YqNynWNArx4PHh38LT0bqKtlZCGHNfoAJLPVYhbB
b2AHj9edYtHU9AAFSIy+HstET6P0UDxy02IeyE2yxoUBqdlXyv6FL44E+wARAQAB
zRxMYXVuY2hwYWQgUFBBIGZvciBVYnVudHUgSFBDwsGOBBMBCgA4FiEErocSHcPk
oLD4H/Aj9tDF1ca+s3sFAmTuZb8CGwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AA
CgkQ9tDF1ca+s3sz3w//RNawsgydrutcbKf0yphDhzWS53wgfrs2KF1KgB0u/H+u
6Kn2C6jrVM0vuY4NKpbEPCduOj21pTCepL6PoCLv++tICOLVok5wY7Zn3WQFq0js
Iy1wO5t3kA1cTD/05v/qQVBGZ2j4DsJo33iMcQS5AjHvSr0nu7XSvDDEE3cQE55D
87vL7lgGjuTOikPh5FpCoS1gpemBfwm2Lbm4P8vGOA4/witRjGgfC1fv1idUnZLM
TbGrDlhVie8pX2kgB6yTYbJ3P3kpC1ZPpXSRWO/cQ8xoYpLBTXOOtqwZZUnxyzHh
gM+hv42vPTOnCo+apD97/VArsp59pDqEVoAtMTk72fdBqR+BB77g2hBkKESgQIEq
EiE1/TOISioMkE0AuUdaJ2ebyQXugSHHuBaqbEC47v8t5DVN5Qr9OriuzCuSDNFn
6SBHpahN9ZNi9w0A/Yh1+lFfpkVw2t04Q2LNuupqOpW+h3/62AeUqjUIAIrmfeML
IDRE2VdquYdIXKuhNvfpJYGdyvx/wAbiAeBWg0uPSepwTfTG59VPQmj0FtalkMnN
ya2212K5q68O5eXOfCnGeMvqIXxqzpdukxSZnLkgk40uFJnJVESd/CxHquqHPUDE
fy6i2AnB3kUI27D4HY2YSlXLSRbjiSxTfVwNCzDsIh7Czefsm6ITK2+cVWs0hNQ=
=cs1s
-----END PGP PUBLIC KEY BLOCK-----
"""
)
)
experimental = apt.DebianRepository(
enabled=True,
repotype="deb",
Expand Down Expand Up @@ -601,7 +560,6 @@ def _init_ubuntu_hpc_ppa() -> None:
)
)
repositories = apt.RepositoryMapping()
repositories.add(slurm_wlm)
repositories.add(experimental)

try:
Expand Down Expand Up @@ -635,10 +593,12 @@ def _install_service(self) -> None:
Raises:
SlurmOpsError: Raised if `apt` fails to install the required Slurm packages.
"""
packages = [self._service_name, "munge", "mungectl", "prometheus-slurm-exporter"]
packages = [self._service_name, "munge", "mungectl"]
match self._service_name:
case "sackd":
packages.extend(["slurm-client"])
NucciTheBoss marked this conversation as resolved.
Show resolved Hide resolved
case "slurmctld":
packages.extend(["libpmix-dev", "mailutils"])
packages.extend(["libpmix-dev", "mailutils", "prometheus-slurm-exporter"])
case "slurmd":
packages.extend(["libpmix-dev", "openmpi-bin"])
case "slurmrestd":
Expand Down Expand Up @@ -674,6 +634,28 @@ def _create_state_save_location(self) -> None:
def _apply_overrides(self) -> None:
"""Override defaults supplied provided by Slurm Debian packages."""
match self._service_name:
case "sackd":
_logger.debug("overriding default sackd service configuration")
config_override = Path(
"/etc/systemd/system/sackd.service.d/10-sackd-config-server.conf"
)
config_override.mkdir(parents=True, exist_ok=True)
NucciTheBoss marked this conversation as resolved.
Show resolved Hide resolved
config_override.write_text(
textwrap.dedent(
"""
[Service]
ExecStart=
ExecStart=/usr/bin/sh -c "/usr/sbin/sackd --systemd $${SACKD_CONFIG_SERVER:+--conf-server $$SACKD_CONFIG_SERVER} $$SACKD_OPTIONS"
"""
NucciTheBoss marked this conversation as resolved.
Show resolved Hide resolved
)
)

# TODO: https://github.com/charmed-hpc/hpc-libs/issues/54 -
# Make `sackd` create its service environment file so that we
# aren't required to manually create it here.
_logger.debug("creating sackd environment file")
self._env_file.touch(mode=0o644)
dotenv.set_key(self._env_file, "SACKD_OPTIONS", "")
case "slurmctld":
_logger.debug("overriding default slurmctld service configuration")
self._set_ulimit()
Expand Down Expand Up @@ -792,11 +774,11 @@ def _apply_overrides(self) -> None:


# TODO: https://github.com/charmed-hpc/hpc-libs/issues/36 -
# Use `jwtctl` to provide backend for generating, setting, and getting
# jwt signing key used by `slurmctld` and `slurmdbd`. This way we also
# won't need to pass the keyfile path to the `__init__` constructor.
# .
# Also, enable `jwtctl` to set the user and group for the keyfile.
# Use `jwtctl` to provide backend for generating, setting, and getting
# jwt signing key used by `slurmctld` and `slurmdbd`. This way we also
# won't need to pass the keyfile path to the `__init__` constructor.
# .
# Also, enable `jwtctl` to set the user and group for the keyfile.
class _JWTKeyManager:
"""Control the jwt signing key used by Slurm."""

Expand Down Expand Up @@ -828,7 +810,7 @@ def generate(self) -> None:


# TODO: https://github.com/charmed-hpc/mungectl/issues/5 -
# Have `mungectl` set user and group permissions on the munge.key file.
# Have `mungectl` set user and group permissions on the munge.key file.
class _MungeKeyManager:
"""Control the munge key via `mungectl ...` commands."""

Expand Down Expand Up @@ -908,6 +890,29 @@ def scontrol(*args) -> str:
return _call("scontrol", *args).stdout


class SackdManager(_SlurmManagerBase):
"""Manager for the Sackd service."""
NucciTheBoss marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, *args, **kwargs) -> None:
super().__init__(service=_ServiceType.SACKD, *args, **kwargs)
self._env_manager = self._ops_manager.env_manager_for(_ServiceType.SACKD)

@property
def config_server(self) -> str:
"""Get the config server address of this Sackd node."""
return self._env_manager.get("SACKD_CONFIG_SERVER")

@config_server.setter
def config_server(self, addr: str) -> None:
"""Set the config server address of this Sackd node."""
self._env_manager.set({"SACKD_CONFIG_SERVER": addr})

@config_server.deleter
def config_server(self) -> None:
"""Unset the config server address of this Sackd node."""
self._env_manager.unset("SACKD_CONFIG_SERVER")


class SlurmctldManager(_SlurmManagerBase):
"""Manager for the Slurmctld service."""

Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_hpc_libs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ provider:
acts:
test-is-container:
name: "Test the is_container library"
run-on: jammy
run-on: noble
input:
- host-path: lib
path: lib
Expand All @@ -43,7 +43,7 @@ acts:
is_container
test-slurm-ops-snap:
name: "Test the slurm_ops library (snap)"
run-on: jammy
run-on: noble
input:
- host-path: lib
path: lib
Expand Down Expand Up @@ -76,7 +76,7 @@ acts:
slurm_ops
test-slurm-ops-apt:
name: "Test the slurm_ops library (apt)"
run-on: jammy
run-on: noble
input:
- host-path: lib
path: lib
Expand Down
38 changes: 33 additions & 5 deletions tests/unit/test_slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import charms.operator_libs_linux.v0.apt as apt
import dotenv
from charms.hpc_libs.v0.slurm_ops import (
SackdManager,
SlurmctldManager,
SlurmdbdManager,
SlurmdManager,
Expand Down Expand Up @@ -272,6 +273,7 @@ class TestAptPackageManager(TestCase):

def setUp(self) -> None:
self.setUpPyfakefs()
self.sackd = SackdManager(snap=False)
self.slurmctld = SlurmctldManager(snap=False)
self.slurmd = SlurmdManager(snap=False)
self.slurmdbd = SlurmdbdManager(snap=False)
Expand Down Expand Up @@ -339,17 +341,27 @@ def test_set_ulimit(self, *_) -> None:
@patch("charms.operator_libs_linux.v0.apt.add_package")
def test_install_service(self, add_package, *_) -> None:
"""Test that `_install_service` installs the correct packages for each service."""
# Install slurmctld.
self.sackd._ops_manager._install_service()
self.assertListEqual(
add_package.call_args[0][0],
[
"sackd",
"munge",
"mungectl",
"slurm-client",
],
)

self.slurmctld._ops_manager._install_service()
self.assertListEqual(
add_package.call_args[0][0],
[
"slurmctld",
"munge",
"mungectl",
"prometheus-slurm-exporter",
"libpmix-dev",
"mailutils",
"prometheus-slurm-exporter",
],
)

Expand All @@ -360,7 +372,6 @@ def test_install_service(self, add_package, *_) -> None:
"slurmd",
"munge",
"mungectl",
"prometheus-slurm-exporter",
"libpmix-dev",
"openmpi-bin",
],
Expand All @@ -369,7 +380,7 @@ def test_install_service(self, add_package, *_) -> None:
self.slurmdbd._ops_manager._install_service()
self.assertListEqual(
add_package.call_args[0][0],
["slurmdbd", "munge", "mungectl", "prometheus-slurm-exporter"],
["slurmdbd", "munge", "mungectl"],
)

self.slurmrestd._ops_manager._install_service()
Expand All @@ -379,7 +390,6 @@ def test_install_service(self, add_package, *_) -> None:
"slurmrestd",
"munge",
"mungectl",
"prometheus-slurm-exporter",
"slurm-wlm-basic-plugins",
],
)
Expand Down Expand Up @@ -573,6 +583,24 @@ def test_scontrol(self, subcmd) -> None:
)


@patch("charms.hpc_libs.v0.slurm_ops.subprocess.run")
class TestSackdConfig(TestCase):
"""Test the `sackd` service configuration manager."""

def setUp(self):
self.setUpPyfakefs()
self.manager = SackdManager(snap=False)
self.fs.create_file("/etc/default/sackd")

def test_config_server(self, *_) -> None:
"""Test that `SACKD_CONFIG_SERVER` is configured correctly."""
self.manager.config_server = "localhost"
self.assertEqual(self.manager.config_server, "localhost")
self.assertEqual(dotenv.get_key("/etc/default/sackd", "SACKD_CONFIG_SERVER"), "localhost")
del self.manager.config_server
self.assertIsNone(self.manager.config_server)


@patch("charms.hpc_libs.v0.slurm_ops.subprocess.run")
class TestSlurmctldConfig(TestCase):
"""Test the Slurmctld service config manager."""
Expand Down
Loading