Skip to content

Commit

Permalink
Refactor a bit the ConfigSource
Browse files Browse the repository at this point in the history
  • Loading branch information
chaen committed Apr 11, 2024
1 parent b045468 commit 0ff8cf2
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 29 deletions.
87 changes: 69 additions & 18 deletions diracx-core/src/diracx/core/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""
This module implements the logic of the configuration server side.
This is where all the backend abstraction and the caching logic takes place
"""

from __future__ import annotations

__all__ = ("Config", "ConfigSource", "LocalGitConfigSource")
__all__ = ("Config", "ConfigSource", "LocalGitConfigSource", "RemoteGitConfigSource")

import logging
import os
Expand Down Expand Up @@ -33,6 +38,10 @@


class ConfigSourceUrl(AnyUrl):
"""
Custom class for managing URL (see validate)
"""

host_required = False

@classmethod
Expand All @@ -46,22 +55,41 @@ def validate(cls, value: Any, field: ModelField, config: BaseConfig) -> AnyUrl:

class ConfigSource(metaclass=ABCMeta):
"""
This classe is the one
This classe is the abstract base class that should be used everywhere
throughout the code.
It acts as a factory for concrete implementations
See the abstractmethods to implement a concrete class
"""

# Keep a mapping between the scheme and the class
__registry: dict[str, type[ConfigSource]] = {}
scheme: str

@abstractmethod
def __init__(self, *, backend_url: ConfigSourceUrl) -> None: ...

@abstractmethod
def latest_revision(self) -> tuple[str, datetime]: ...
def latest_revision(self) -> tuple[str, datetime]:
"""Must return:
* a unique hash as a string, representing the last version
* a datetime object corresponding to when the version dates
"""
...

@abstractmethod
def read_raw(self, hexsha: str, modified: datetime) -> Config: ...
def read_raw(self, hexsha: str, modified: datetime) -> Config:
"""
Return the Config object that corresponds to the
specific hash
The `modified` parameter is just added as a attribute to the config
"""
...

def __init_subclass__(cls) -> None:
"""
Keep a record of <scheme: class>
"""
if cls.scheme in cls.__registry:
raise TypeError(f"{cls.scheme=} is already define")
cls.__registry[cls.scheme] = cls
Expand All @@ -74,6 +102,11 @@ def create(cls):
def create_from_url(
cls, *, backend_url: ConfigSourceUrl | Path | str
) -> ConfigSource:
"""
Factory method to produce a concrete instance depending on
the backend URL scheme
"""
url = parse_obj_as(ConfigSourceUrl, str(backend_url))
return cls.__registry[url.scheme](backend_url=url)

Expand All @@ -85,15 +118,25 @@ def read_config(self) -> Config:
hexsha, modified = self.latest_revision()
return self.read_raw(hexsha, modified)

def clear_caches(self): # noqa
pass
@abstractmethod
def clear_caches(self): ...


class BaseGitConfigSource(ConfigSource):
scheme = "git"
"""
Base class for the git based config source
The caching is based on 2 caches:
* TTL to find the latest commit hashes
* LRU to keep in memory the last few versions
"""

repo: git.Repo

# Needed because of the ConfigSource.__init_subclass__
scheme = "basegit"

def __init__(self, *, backend_url: ConfigSourceUrl) -> None:
self.repo: git.Repo
super().__init__(backend_url=backend_url)
self._latest_revision_cache: Cache = TTLCache(
MAX_CS_CACHED_VERSIONS, DEFAULT_CS_CACHE_TTL
Expand All @@ -115,9 +158,8 @@ def latest_revision(self) -> tuple[str, datetime]:
@cachedmethod(lambda self: self._read_raw_cache)
def read_raw(self, hexsha: str, modified: datetime) -> Config:
"""
Returns the raw data from the git repo
:param: hexsha commit hash
:returns hexsha, commit time, data
"""
logger.debug("Reading %s for %s with mtime %s", self, hexsha, modified)
rev = self.repo.rev_parse(hexsha)
Expand All @@ -134,48 +176,57 @@ def clear_caches(self):


class LocalGitConfigSource(BaseGitConfigSource):
"""
The configuration is stored on a local git repository
When running on multiple servers, the filesystem must be shared
"""

scheme = "git+file"

def __init__(self, *, backend_url: ConfigSourceUrl) -> None:
super().__init__(backend_url=backend_url)
if not backend_url.path:
raise ValueError("Empty path for LocalGitConfigSource")

repo_location = Path(backend_url.path)
self.repo_location = repo_location
self.repo = git.Repo(repo_location)
self.repo_location = Path(backend_url.path)
self.repo = git.Repo(self.repo_location)

def __hash__(self):
return hash(self.repo_location)


class RemoteGitConfigSource(BaseGitConfigSource):
"""Clone a remote git repository on a tmp local dir"""
"""
Use a remote directory as a config source
"""

scheme = "git+https"

def __init__(self, *, backend_url: ConfigSourceUrl) -> None:
super().__init__(backend_url=backend_url)
if not backend_url:
raise ValueError("No remote url for RemoteGitConfigSource")
self.remote_url = backend_url

# git does not understand `git+https`, so we remove the `git+` part
self.remote_url = backend_url.replace("git+", "")
self._temp_dir = TemporaryDirectory()
self.repo_location = Path(self._temp_dir.name)
self.repo = git.Repo.clone_from(self.remote_url, self.repo_location)
self._pull_cache: Cache = TTLCache(
MAX_PULL_CACHED_VERSIONS, DEFAULT_PULL_CACHE_TTL
)

def clear_temp(self):
"""Clean up temp dir"""
self._temp_dir.cleanup()
def clear_caches(self):
super().clear_caches()
self._pull_cache.clear()

def __hash__(self):
return hash(self.repo_location)

@cachedmethod(lambda self: self._pull_cache)
def _pull(self):
"""Git pull from remote repo"""
print("CHRIS PULL")
self.repo.remotes.origin.pull()

def latest_revision(self) -> tuple[str, datetime]:
Expand Down
5 changes: 5 additions & 0 deletions diracx-core/src/diracx/core/config/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,5 +159,10 @@ class Config(BaseModel):
Systems: Any
WebApp: Any

# These 2 parameters are used for client side caching
# see the "/config/" route for details

# hash for a unique representation of the config version
_hexsha: str = PrivateAttr()
# modification date
_modified: datetime = PrivateAttr()
31 changes: 20 additions & 11 deletions diracx-core/tests/test_config_source.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,36 @@
import datetime
from urllib import request

import pytest

from diracx.core.config import RemoteGitConfigSource
from diracx.core.config import ConfigSource, RemoteGitConfigSource
from diracx.core.config.schema import Config

DIRACX_URL = "https://github.com/DIRACGrid/diracx-charts/"
# The diracx-chart contains a CS example
TEST_REPO = "git+https://github.com/DIRACGrid/diracx-charts/"


@pytest.fixture
def change_default_branch_and_file(monkeypatch):
monkeypatch.setattr("diracx.core.config.DEFAULT_GIT_BRANCH", "master")
def github_is_down():
try:

request.urlopen("https://github.com", timeout=1)
return False
except Exception:
return True


@pytest.mark.skipif(github_is_down(), reason="Github unavailble")
def test_remote_git_config_source(monkeypatch):

monkeypatch.setattr(
"diracx.core.config.DEFAULT_CONFIG_FILE",
"k3s/examples/cs.yaml",
)
remote_conf = ConfigSource.create_from_url(backend_url=TEST_REPO)
assert isinstance(remote_conf, RemoteGitConfigSource)


def test_remote_git_config_source(change_default_branch_and_file):
RemoteConf = RemoteGitConfigSource(backend_url=DIRACX_URL)
hexsha, modified = RemoteConf.latest_revision()
hexsha, modified = remote_conf.latest_revision()
assert isinstance(hexsha, str)
assert isinstance(modified, datetime.datetime)
result = RemoteConf.read_raw(hexsha, modified)
result = remote_conf.read_raw(hexsha, modified)
assert isinstance(result, Config)
RemoteConf.clear_temp()

0 comments on commit 0ff8cf2

Please sign in to comment.