diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py index 4dd3c8a17b..0c16ce9116 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/tests/test_util.py @@ -13,10 +13,13 @@ # limitations under the License. import pytest +import itertools import common # needed to import util import util from google.api_core.client_options import ClientOptions # noqa: E402 +from util import NodeState + # Note: need to install pytest-mock @pytest.mark.parametrize( @@ -130,3 +133,24 @@ def test_create_client_options( ud_mock.return_value = "googleapis.com" ep_mock.return_value = ep_ver assert util.create_client_options(api).__repr__() == expected.__repr__() + + +def test_NodeState(): + + for l,r in itertools.product((NodeState.DOWN, NodeState("DOWN"), "DOWN"), repeat=2): + assert l == r + + gecko = NodeState("GECKO") # unknown state + assert gecko == NodeState("GECKO") + assert gecko == "GECKO" + assert gecko != NodeState.DOWN + + + assert NodeState.DOWN != NodeState.POWER_DOWN + assert NodeState.DOWN != NodeState("dOwN") # case sensitive + + assert f"{NodeState.DOWN}" == "NodeState.DOWN" + assert NodeState.DOWN.__repr__() == "NodeState.DOWN" + + assert f"{gecko}" == "NodeState.GECKO" + assert gecko.__repr__() == "NodeState.GECKO" diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py index da62bf5c33..ea2765e663 100755 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/modules/slurm_files/scripts/util.py @@ -33,7 +33,7 @@ import subprocess import sys import tempfile -from enum import Enum +import enum from collections import defaultdict, namedtuple from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import contextmanager @@ -127,7 +127,7 @@ def mkdirp(path: Path) -> None: ] = lambda self, data: yaml.representer.SafeRepresenter.represent_str(self, str(data)) -class ApiEndpoint(Enum): +class ApiEndpoint(enum.Enum): COMPUTE = "compute" BQ = "bq" STORAGE = "storage" @@ -1917,3 +1917,52 @@ def update_config(cfg: NSDict) -> None: def scontrol_reconfigure(lkp: Lookup) -> None: log.info("Running scontrol reconfigure") run(f"{lkp.scontrol} reconfigure", timeout=30) + + +class NodeState(str, enum.Enum): + """ + Representation of Slurm NODE_STATE + + For source of truth with 1-line explanation, see: + https://github.com/SchedMD/slurm/blob/master/slurm/slurm.h#L980 + For string representation to match, see: + https://github.com/SchedMD/slurm/blob/master/src/plugins/data_parser/v0.0.42/parsers.c#L7449 + + NOTE: Copy of StrEnum implementation from python 3.11 stripped of extra features. + Additionally support "unknown" items by defining `_missing_`. + See tests for contract. + """ + def __new__(cls, value): + return str.__new__(cls, value) + + @classmethod + def _missing_(cls, value): + o = str.__new__(cls, value) + o._name_ = value + return o + + def _generate_next_value_(name, *_): + return name + + def __repr__(self): + return self.__str__() + + # *** Base states + DOWN = enum.auto() # node in non-usable state + IDLE = enum.auto() # node idle and available for use + + # *** "state flags" + + CLOUD = enum.auto() # node comes from cloud + + # Restore a DRAINED, DRAINING, DOWN or FAILING node to service + # (e.g. IDLE or ALLOCATED). Used in slurm_update_node() request + RESUME = enum.auto() + + POWER_DOWN = enum.auto() # manual node power down + POWERED_DOWN = enum.auto() # node is powered down + POWERING_DOWN = enum.auto() # node is powering down + + POWERING_UP = enum.auto() # node is powering up + + COMPLETING = enum.auto() # node is completing allocated job