From 5f59167693cfceeda6c9e2cf3cef8c54e1c242e1 Mon Sep 17 00:00:00 2001 From: Martin Lehmann Date: Thu, 2 Nov 2023 15:36:22 +0100 Subject: [PATCH] feat: Add caching to the Gitlab Artifacts file handler With this change, the Gitlab Artifacts file handler will locally cache both downloaded files and "not found" responses. It now also supports the "disable_cache" parameter, which will clear the local cache if set to True. --- capellambse/filehandler/gitlab_artifacts.py | 32 ++++++++++++++++++++- pyproject.toml | 3 ++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/capellambse/filehandler/gitlab_artifacts.py b/capellambse/filehandler/gitlab_artifacts.py index 7d2b1312f..9602a6d27 100644 --- a/capellambse/filehandler/gitlab_artifacts.py +++ b/capellambse/filehandler/gitlab_artifacts.py @@ -13,11 +13,14 @@ import sys import typing as t import urllib.parse +import weakref +import diskcache import requests import requests.exceptions import urllib3.exceptions +import capellambse from capellambse import helpers, loader from . import abc @@ -104,6 +107,8 @@ class GitlabArtifactsFiles(abc.FileHandler): subdir An optional path prefix inside the artifacts archive to prepend to all file names. + disable_cache + Clear the local cache and discard any previously cached data. See Also -------- @@ -120,6 +125,7 @@ def __init__( project: str | int | None = None, branch: str | None = None, job: str | int, + disable_cache: bool = False, ) -> None: super().__init__(path, subdir=subdir) @@ -131,6 +137,15 @@ def __init__( self.__branch = branch or os.getenv("CI_DEFAULT_BRANCH") or "main" self.__job = self.__resolve_job(job) + self.__cache = diskcache.Cache( + capellambse.dirs.user_cache_path / "gitlab-artifacts" + ) + # pylint: disable-next=unused-private-member + self.__fnz = weakref.finalize(self, self.__cache.close) + + if disable_cache: + self.__cache.clear() + def __repr__(self) -> str: return ( f"{type(self).__name__}(path={self.__path!r}, token=," @@ -314,7 +329,15 @@ def open( if "w" in mode: raise TypeError("Cannot write to Gitlab artifacts") - LOGGER.debug("Opening file %r for reading", path) + cachekey = f"{self.__path}|{self.__project}|{self.__job}|{path}" + if cachekey in self.__cache: + content = self.__cache[cachekey] + if content is None: + LOGGER.debug("Negative cache hit for %r", path) + raise FileNotFoundError(errno.ENOENT, filename) + LOGGER.debug("Opening cached file %r for reading", path) + return io.BytesIO(self.__cache[cachekey]) + try: response = self.__rawget( f"{self.__path}/api/v4/projects/{self.__project}" @@ -334,8 +357,15 @@ def open( raise if err2.args != (0, 2): raise + + LOGGER.debug("File not found in artifacts archive: %r", path) + self.__cache[cachekey] = None raise FileNotFoundError(errno.ENOENT, filename) from None if response.status_code in (400, 404): + LOGGER.debug("File not found in artifacts archive: %r", path) + self.__cache[cachekey] = None raise FileNotFoundError(errno.ENOENT, filename) response.raise_for_status() + LOGGER.debug("Opening file %r for reading", path) + self.__cache[cachekey] = response.content return io.BytesIO(response.content) diff --git a/pyproject.toml b/pyproject.toml index bf7ecd7be..cb2f2c0ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,6 +60,7 @@ docs = [ test = [ "click", "cssutils", + "diskcache>=5.0", "pytest", "pytest-cov", "pyyaml>=6.0", @@ -75,6 +76,7 @@ decl = [ ] httpfiles = [ + "diskcache>=5.0", "requests>=2.25.0", ] @@ -151,6 +153,7 @@ allow_untyped_defs = true module = [ "cairosvg.*", "cssutils.*", + "diskcache.*", "lxml.*", "PIL.*", "requests_mock.*",