Skip to content

Commit

Permalink
feat: add method to check if a service is active
Browse files Browse the repository at this point in the history
  • Loading branch information
jedel1043 authored and NucciTheBoss committed Jul 12, 2024
1 parent bacb4d3 commit 9cda74e
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 4 deletions.
15 changes: 14 additions & 1 deletion lib/charms/hpc_libs/v0/slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def _on_install(self, _) -> None:
"ConfigurationManager",
"ServiceType",
"SlurmManagerBase",
"SlurmOpsError",
]

import json
Expand Down Expand Up @@ -131,7 +132,8 @@ def install() -> None:
def version() -> str:
"""Get the current version of Slurm installed on the system."""
info = yaml.safe_load(_snap("info", "slurm"))
ver: str = info["installed"]
if (ver := info.get("installed")) is None:
raise SlurmOpsError("unable to retrive snap info. Ensure slurm is correctly installed")
return ver.split(maxsplit=1)[0]


Expand Down Expand Up @@ -208,6 +210,17 @@ def restart(self) -> None:
"""Restart service."""
_snap("restart", f"slurm.{self._service.value}")

def active(self) -> bool:
"""Return True if the service is active."""
info = yaml.safe_load(_snap("info", "slurm"))
if (services := info.get("services")) is None:
raise SlurmOpsError("unable to retrive snap info. Ensure slurm is correctly installed")

# Assume `services` contains the service, since `ServiceManager` is not exposed as a
# public interface for now.
# We don't do `"active" in state` because the word "active" is also part of "inactive" :)
return "inactive" not in services[f"slurm.{self._service.value}"]


class ConfigurationManager:
"""Control configuration of a Slurm component."""
Expand Down
10 changes: 8 additions & 2 deletions tests/integration/slurm_ops/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def slurmctld() -> SlurmManagerBase:
def test_install(slurmctld: SlurmManagerBase) -> None:
"""Install Slurm using the manager."""
slurm.install()
slurmctld.enable()
slurmctld.munge.generate_key()

with open("/var/snap/slurm/common/etc/munge/munge.key", "rb") as f:
Expand All @@ -40,7 +39,7 @@ def test_rotate_key(slurmctld: SlurmManagerBase) -> None:
@pytest.mark.order(3)
def test_slurm_config(slurmctld: SlurmManagerBase) -> None:
"""Test that the slurm config can be changed."""
slurmctld.config.set({"cluster-name": "test-cluster"})
slurmctld.config.set({"slurmctld-host": "test-slurm-ops", "cluster-name": "test-cluster"})
value = slurmctld.config.get("cluster-name")
assert value == "test-cluster"

Expand All @@ -57,6 +56,13 @@ def test_slurm_config(slurmctld: SlurmManagerBase) -> None:


@pytest.mark.order(4)
def test_enable_service(slurmctld: SlurmManagerBase) -> None:
"""Test that the slurmctl daemon can be enabled."""
slurmctld.enable()
assert slurmctld.active()


@pytest.mark.order(5)
def test_version() -> None:
"""Test that the Slurm manager can report its version."""
version = slurm.version()
Expand Down
48 changes: 47 additions & 1 deletion tests/unit/test_slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from unittest.mock import patch

import charms.hpc_libs.v0.slurm_ops as slurm
from charms.hpc_libs.v0.slurm_ops import ServiceType, SlurmManagerBase
from charms.hpc_libs.v0.slurm_ops import ServiceType, SlurmManagerBase, SlurmOpsError

MUNGEKEY = b"1234567890"
MUNGEKEY_BASE64 = base64.b64encode(MUNGEKEY)
Expand All @@ -27,15 +27,35 @@
- slurm.command1
- slurm.command2
services:
slurm.logrotate: oneshot, enabled, inactive
slurm.munged: simple, enabled, active
slurm.slurm-prometheus-exporter: simple, disabled, inactive
slurm.slurmctld: simple, disabled, active
slurm.slurmd: simple, enabled, active
slurm.slurmdbd: simple, disabled, active
slurm.slurmrestd: simple, disabled, active
channels:
latest/stable: –
latest/candidate: 23.11.7 2024-06-26 (460) 114MB classic
latest/beta: ↑
latest/edge: 23.11.7 2024-06-26 (459) 114MB classic
installed: 23.11.7 (x1) 114MB classic
"""
SLURM_INFO_NOT_INSTALLED = """
name: slurm
summary: "Slurm: A Highly Scalable Workload Manager"
publisher: –
store-url: https://snapcraft.io/slurm
license: Apache-2.0
description: |
Slurm is an open source, fault-tolerant, and highly scalable cluster
management and job scheduling system for large and small Linux clusters.
channels:
latest/stable: –
latest/candidate: 23.11.7 2024-06-26 (460) 114MB classic
latest/beta: ↑
latest/edge: 23.11.7 2024-06-26 (459) 114MB classic
"""


@patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output")
Expand All @@ -61,12 +81,25 @@ def test_version(self, subcmd) -> None:
self.assertEqual(args, ["snap", "info", "slurm"])
self.assertEqual(version, "23.11.7")

def test_version_not_installed(self, subcmd) -> None:
"""Test that `slurm_ops` throws when getting the installed version if the slurm snap is not installed."""
subcmd.return_value = SLURM_INFO_NOT_INSTALLED.encode()
with self.assertRaises(slurm.SlurmOpsError):
slurm.version()
args = subcmd.call_args[0][0]
self.assertEqual(args, ["snap", "info", "slurm"])

def test_call_error(self, subcmd) -> None:
"""Test that `slurm_ops` propagates errors when a command fails."""
subcmd.side_effect = subprocess.CalledProcessError(-1, cmd=[""], stderr=b"error")
with self.assertRaises(slurm.SlurmOpsError):
slurm.install()

def test_error_message(self, *_) -> None:
"""Test that `SlurmOpsError` stores the correct message."""
message = "error message!"
self.assertEqual(SlurmOpsError(message).message, message)


@patch("charms.hpc_libs.v0.slurm_ops.subprocess.check_output")
class SlurmOpsBase:
Expand Down Expand Up @@ -101,6 +134,19 @@ def test_restart(self, subcmd, *_) -> None:
args = subcmd.call_args[0][0]
self.assertEqual(args, ["snap", "restart", f"slurm.{self.manager._service.value}"])

def test_active(self, subcmd, *_) -> None:
"""Test that the manager can detect that a service is active."""
subcmd.return_value = SLURM_INFO.encode()
self.assertTrue(self.manager.active())

def test_active_not_installed(self, subcmd, *_) -> None:
"""Test that the manager throws an error when calling `active` if the snap is not installed."""
subcmd.return_value = SLURM_INFO_NOT_INSTALLED.encode()
with self.assertRaises(slurm.SlurmOpsError):
self.manager.active()
args = subcmd.call_args[0][0]
self.assertEqual(args, ["snap", "info", "slurm"])

def test_get_options(self, subcmd) -> None:
"""Test that the manager correctly collects all requested configuration options."""
subcmd.return_value = '{"%(name)s.key1": "value1", "%(name)s.key2": "value2"}' % {
Expand Down

0 comments on commit 9cda74e

Please sign in to comment.