From 0ff8cf263e25a9b0495afa2c03dee4a043964e63 Mon Sep 17 00:00:00 2001 From: Christophe Haen Date: Thu, 11 Apr 2024 14:55:46 +0200 Subject: [PATCH] Refactor a bit the ConfigSource --- .../src/diracx/core/config/__init__.py | 87 +++++++++++++++---- diracx-core/src/diracx/core/config/schema.py | 5 ++ diracx-core/tests/test_config_source.py | 31 ++++--- 3 files changed, 94 insertions(+), 29 deletions(-) diff --git a/diracx-core/src/diracx/core/config/__init__.py b/diracx-core/src/diracx/core/config/__init__.py index bf1323f7..1b188338 100644 --- a/diracx-core/src/diracx/core/config/__init__.py +++ b/diracx-core/src/diracx/core/config/__init__.py @@ -1,6 +1,11 @@ +""" +This module implements the logic of the configuration server side. +This is where all the backend abstraction and the caching logic takes place +""" + from __future__ import annotations -__all__ = ("Config", "ConfigSource", "LocalGitConfigSource") +__all__ = ("Config", "ConfigSource", "LocalGitConfigSource", "RemoteGitConfigSource") import logging import os @@ -33,6 +38,10 @@ class ConfigSourceUrl(AnyUrl): + """ + Custom class for managing URL (see validate) + """ + host_required = False @classmethod @@ -46,9 +55,13 @@ def validate(cls, value: Any, field: ModelField, config: BaseConfig) -> AnyUrl: class ConfigSource(metaclass=ABCMeta): """ - This classe is the one + This classe is the abstract base class that should be used everywhere + throughout the code. + It acts as a factory for concrete implementations + See the abstractmethods to implement a concrete class """ + # Keep a mapping between the scheme and the class __registry: dict[str, type[ConfigSource]] = {} scheme: str @@ -56,12 +69,27 @@ class ConfigSource(metaclass=ABCMeta): def __init__(self, *, backend_url: ConfigSourceUrl) -> None: ... @abstractmethod - def latest_revision(self) -> tuple[str, datetime]: ... + def latest_revision(self) -> tuple[str, datetime]: + """Must return: + * a unique hash as a string, representing the last version + * a datetime object corresponding to when the version dates + """ + ... @abstractmethod - def read_raw(self, hexsha: str, modified: datetime) -> Config: ... + def read_raw(self, hexsha: str, modified: datetime) -> Config: + """ + Return the Config object that corresponds to the + specific hash + The `modified` parameter is just added as a attribute to the config + + """ + ... def __init_subclass__(cls) -> None: + """ + Keep a record of + """ if cls.scheme in cls.__registry: raise TypeError(f"{cls.scheme=} is already define") cls.__registry[cls.scheme] = cls @@ -74,6 +102,11 @@ def create(cls): def create_from_url( cls, *, backend_url: ConfigSourceUrl | Path | str ) -> ConfigSource: + """ + Factory method to produce a concrete instance depending on + the backend URL scheme + + """ url = parse_obj_as(ConfigSourceUrl, str(backend_url)) return cls.__registry[url.scheme](backend_url=url) @@ -85,15 +118,25 @@ def read_config(self) -> Config: hexsha, modified = self.latest_revision() return self.read_raw(hexsha, modified) - def clear_caches(self): # noqa - pass + @abstractmethod + def clear_caches(self): ... class BaseGitConfigSource(ConfigSource): - scheme = "git" + """ + Base class for the git based config source + The caching is based on 2 caches: + * TTL to find the latest commit hashes + * LRU to keep in memory the last few versions + + """ + + repo: git.Repo + + # Needed because of the ConfigSource.__init_subclass__ + scheme = "basegit" def __init__(self, *, backend_url: ConfigSourceUrl) -> None: - self.repo: git.Repo super().__init__(backend_url=backend_url) self._latest_revision_cache: Cache = TTLCache( MAX_CS_CACHED_VERSIONS, DEFAULT_CS_CACHE_TTL @@ -115,9 +158,8 @@ def latest_revision(self) -> tuple[str, datetime]: @cachedmethod(lambda self: self._read_raw_cache) def read_raw(self, hexsha: str, modified: datetime) -> Config: """ - Returns the raw data from the git repo + :param: hexsha commit hash - :returns hexsha, commit time, data """ logger.debug("Reading %s for %s with mtime %s", self, hexsha, modified) rev = self.repo.rev_parse(hexsha) @@ -134,6 +176,11 @@ def clear_caches(self): class LocalGitConfigSource(BaseGitConfigSource): + """ + The configuration is stored on a local git repository + When running on multiple servers, the filesystem must be shared + """ + scheme = "git+file" def __init__(self, *, backend_url: ConfigSourceUrl) -> None: @@ -141,16 +188,17 @@ def __init__(self, *, backend_url: ConfigSourceUrl) -> None: if not backend_url.path: raise ValueError("Empty path for LocalGitConfigSource") - repo_location = Path(backend_url.path) - self.repo_location = repo_location - self.repo = git.Repo(repo_location) + self.repo_location = Path(backend_url.path) + self.repo = git.Repo(self.repo_location) def __hash__(self): return hash(self.repo_location) class RemoteGitConfigSource(BaseGitConfigSource): - """Clone a remote git repository on a tmp local dir""" + """ + Use a remote directory as a config source + """ scheme = "git+https" @@ -158,7 +206,9 @@ def __init__(self, *, backend_url: ConfigSourceUrl) -> None: super().__init__(backend_url=backend_url) if not backend_url: raise ValueError("No remote url for RemoteGitConfigSource") - self.remote_url = backend_url + + # git does not understand `git+https`, so we remove the `git+` part + self.remote_url = backend_url.replace("git+", "") self._temp_dir = TemporaryDirectory() self.repo_location = Path(self._temp_dir.name) self.repo = git.Repo.clone_from(self.remote_url, self.repo_location) @@ -166,9 +216,9 @@ def __init__(self, *, backend_url: ConfigSourceUrl) -> None: MAX_PULL_CACHED_VERSIONS, DEFAULT_PULL_CACHE_TTL ) - def clear_temp(self): - """Clean up temp dir""" - self._temp_dir.cleanup() + def clear_caches(self): + super().clear_caches() + self._pull_cache.clear() def __hash__(self): return hash(self.repo_location) @@ -176,6 +226,7 @@ def __hash__(self): @cachedmethod(lambda self: self._pull_cache) def _pull(self): """Git pull from remote repo""" + print("CHRIS PULL") self.repo.remotes.origin.pull() def latest_revision(self) -> tuple[str, datetime]: diff --git a/diracx-core/src/diracx/core/config/schema.py b/diracx-core/src/diracx/core/config/schema.py index 6d50dbb9..79d31002 100644 --- a/diracx-core/src/diracx/core/config/schema.py +++ b/diracx-core/src/diracx/core/config/schema.py @@ -159,5 +159,10 @@ class Config(BaseModel): Systems: Any WebApp: Any + # These 2 parameters are used for client side caching + # see the "/config/" route for details + + # hash for a unique representation of the config version _hexsha: str = PrivateAttr() + # modification date _modified: datetime = PrivateAttr() diff --git a/diracx-core/tests/test_config_source.py b/diracx-core/tests/test_config_source.py index df00b1b0..bb4a2e3b 100644 --- a/diracx-core/tests/test_config_source.py +++ b/diracx-core/tests/test_config_source.py @@ -1,27 +1,36 @@ import datetime +from urllib import request import pytest -from diracx.core.config import RemoteGitConfigSource +from diracx.core.config import ConfigSource, RemoteGitConfigSource from diracx.core.config.schema import Config -DIRACX_URL = "https://github.com/DIRACGrid/diracx-charts/" +# The diracx-chart contains a CS example +TEST_REPO = "git+https://github.com/DIRACGrid/diracx-charts/" -@pytest.fixture -def change_default_branch_and_file(monkeypatch): - monkeypatch.setattr("diracx.core.config.DEFAULT_GIT_BRANCH", "master") +def github_is_down(): + try: + + request.urlopen("https://github.com", timeout=1) + return False + except Exception: + return True + + +@pytest.mark.skipif(github_is_down(), reason="Github unavailble") +def test_remote_git_config_source(monkeypatch): + monkeypatch.setattr( "diracx.core.config.DEFAULT_CONFIG_FILE", "k3s/examples/cs.yaml", ) + remote_conf = ConfigSource.create_from_url(backend_url=TEST_REPO) + assert isinstance(remote_conf, RemoteGitConfigSource) - -def test_remote_git_config_source(change_default_branch_and_file): - RemoteConf = RemoteGitConfigSource(backend_url=DIRACX_URL) - hexsha, modified = RemoteConf.latest_revision() + hexsha, modified = remote_conf.latest_revision() assert isinstance(hexsha, str) assert isinstance(modified, datetime.datetime) - result = RemoteConf.read_raw(hexsha, modified) + result = remote_conf.read_raw(hexsha, modified) assert isinstance(result, Config) - RemoteConf.clear_temp()