Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(slurm_ops): add acct_gather.conf configuration file editor to SlurmctldManager #56

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# lib deps
slurmutils ~= 0.8.0
slurmutils ~= 0.9.0
python-dotenv ~= 1.0.1
pyyaml >= 6.0.2
distro ~=1.9.0
Expand Down
33 changes: 29 additions & 4 deletions lib/charms/hpc_libs/v0/slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def _on_install(self, _) -> None:
import yaml
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric import rsa
from slurmutils.editors import cgroupconfig, slurmconfig, slurmdbdconfig
from slurmutils.models import CgroupConfig, SlurmConfig, SlurmdbdConfig
from slurmutils.editors import acctgatherconfig, cgroupconfig, slurmconfig, slurmdbdconfig
from slurmutils.models import AcctGatherConfig, CgroupConfig, SlurmConfig, SlurmdbdConfig

try:
import charms.operator_libs_linux.v0.apt as apt
Expand All @@ -96,14 +96,14 @@ def _on_install(self, _) -> None:

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 8
LIBPATCH = 9

# Charm library dependencies to fetch during `charmcraft pack`.
PYDEPS = [
"cryptography~=43.0.1",
"pyyaml>=6.0.2",
"python-dotenv~=1.0.1",
"slurmutils~=0.8.3",
"slurmutils~=0.9.0",
"distro~=1.9.0",
]

Expand Down Expand Up @@ -263,6 +263,28 @@ def edit(self) -> SlurmConfig:
yield config


class _AcctGatherConfigManager(_ConfigManager):
"""Manage the `acct_gather.conf` configuration file."""

def load(self) -> AcctGatherConfig:
"""Load the current `acct_gather.conf` configuration file."""
return acctgatherconfig.load(self._config_path)

def dump(self, config: AcctGatherConfig) -> None:
"""Dump new configuration into `acct_gather.conf` configuration file."""
acctgatherconfig.dump(
config, self._config_path, mode=0o600, user=self._user, group=self._group
)

@contextmanager
def edit(self) -> AcctGatherConfig:
"""Edit the current `acct_gather.conf` configuration file."""
with acctgatherconfig.edit(
self._config_path, mode=0o600, user=self._user, group=self._group
) as config:
yield config


class _CgroupConfigManager(_ConfigManager):
"""Control the `cgroup.conf` configuration file."""

Expand Down Expand Up @@ -916,6 +938,9 @@ def __init__(self, *args, **kwargs) -> None:
self.config = _SlurmConfigManager(
self._ops_manager.etc_path / "slurm.conf", self.user, self.group
)
self.acct_gather = _AcctGatherConfigManager(
self._ops_manager.etc_path / "acct_gather.conf", self.user, self.group
)
self.cgroup = _CgroupConfigManager(
self._ops_manager.etc_path / "cgroup.conf", self.user, self.group
)
Expand Down
60 changes: 60 additions & 0 deletions tests/unit/test_slurm_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,27 @@
PartitionName=DEFAULT MaxTime=30 MaxNodes=10 State=UP
PartitionName=batch Nodes=juju-c9fc6f-2,juju-c9fc6f-3,juju-c9fc6f-4,juju-c9fc6f-5 MinNodes=4 MaxTime=120 AllowGroups=admin
"""
EXAMPLE_ACCT_GATHER_CONFIG = """#
# `acct_gather.conf` file generated at 2024-09-18 15:10:44.652017 by slurmutils.
#
EnergyIPMIFrequency=1
EnergyIPMICalcAdjustment=yes
EnergyIPMIPowerSensors=Node=16,19;Socket1=19,26;KNC=16,19
EnergyIPMIUsername=testipmiusername
EnergyIPMIPassword=testipmipassword
EnergyIPMITimeout=10
ProfileHDF5Dir=/mydir
ProfileHDF5Default=ALL
ProfileInfluxDBDatabase=acct_gather_db
ProfileInfluxDBDefault=ALL
ProfileInfluxDBHost=testhostname
ProfileInfluxDBPass=testpassword
ProfileInfluxDBRTPolicy=testpolicy
ProfileInfluxDBUser=testuser
ProfileInfluxDBTimeout=10
InfinibandOFEDPort=0
SysfsInterfaces=enp0s1
"""
EXAMPLE_CGROUP_CONFIG = """#
# `cgroup.conf` file generated at 2024-09-18 15:10:44.652017 by slurmutils.
#
Expand Down Expand Up @@ -625,6 +646,45 @@ def test_config(self, *_) -> None:
self.assertEqual(f_info.st_gid, FAKE_GROUP_GID)


class TestAcctGatherConfig(TestCase):
"""Test the `slurmctld` service acct_gather configuration manager."""

def setUp(self) -> None:
self.setUpPyfakefs()
self.manager = SlurmctldManager(snap=True)
self.fs.create_file(
"/var/snap/slurm/common/etc/slurm/acct_gather.conf",
contents=EXAMPLE_ACCT_GATHER_CONFIG,
)

def test_config(self) -> None:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be cleaner to split this into multiple tests. There's a lot of asserts in this function.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have it like this as slurmutils should be responsible for testing the nitty gritty of each editor/model with individual test functions. These unit tests are more or less for testing that we don't break the API for managing Slurm configs with slurm_ops.

If anything, we could probably make the tests shorter. The only thing is that we still have to be clever with mocking the uid and gid of the file as pyfakes doesn't provide any abstractions for doing that.

"""Test that manager can manipulate cgroup.conf configuration file."""
# Fake user and group that owns `cgroup.conf`.
self.manager.acct_gather._user = FAKE_USER_NAME
self.manager.acct_gather._group = FAKE_GROUP_NAME

with self.manager.acct_gather.edit() as config:
self.assertEqual(config.energy_ipmi_frequency, "1")
self.assertEqual(config.energy_ipmi_calc_adjustment, "yes")
self.assertListEqual(config.sysfs_interfaces, ["enp0s1"])

config.energy_ipmi_frequency = "2"
config.energy_ipmi_calc_adjustment = "no"
config.sysfs_interfaces = ["enp0s2"]

# Exit the context to save changes to the file
config = self.manager.acct_gather.load()
self.assertEqual(config.energy_ipmi_frequency, "2")
self.assertEqual(config.energy_ipmi_calc_adjustment, "no")
self.assertListEqual(config.sysfs_interfaces, ["enp0s2"])

# Ensure that permissions on file are correct.
f_info = Path("/var/snap/slurm/common/etc/slurm/acct_gather.conf").stat()
self.assertEqual(stat.filemode(f_info.st_mode), "-rw-------")
self.assertEqual(f_info.st_uid, FAKE_USER_UID)
self.assertEqual(f_info.st_gid, FAKE_GROUP_GID)


@patch("charms.hpc_libs.v0.slurm_ops.subprocess.run")
class TestCgroupConfig(TestCase):
"""Test the Slurmctld service cgroup config manager."""
Expand Down