diff --git a/.coveragerc.Linux b/.coveragerc.Linux index e8d6a900..f7f476a6 100644 --- a/.coveragerc.Linux +++ b/.coveragerc.Linux @@ -1,4 +1,4 @@ [report] -exclude_lines = +exclude_also = pragma: no cover pragma: no Linux cover diff --git a/.coveragerc.Windows b/.coveragerc.Windows index 63ca68b9..08320876 100644 --- a/.coveragerc.Windows +++ b/.coveragerc.Windows @@ -1,4 +1,4 @@ [report] -exclude_lines = +exclude_also = pragma: no cover pragma: no Windows cover diff --git a/.coveragerc.macOS b/.coveragerc.macOS index 50f658e2..c34b3d81 100644 --- a/.coveragerc.macOS +++ b/.coveragerc.macOS @@ -1,4 +1,4 @@ [report] -exclude_lines = +exclude_also = pragma: no cover pragma: no macOS cover diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index cf4a3b07..372b962e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,6 +21,9 @@ jobs: python-version: '3.9' - os: ubuntu-latest python-version: '3.11' + - os: ubuntu-latest + python-version: '3.11' + fs: minio steps: - uses: actions/checkout@v4 @@ -35,29 +38,34 @@ jobs: python -V python -m pip install --upgrade pip - - name: Install packages with file-system backend + - name: Install dependencies run: | - pip install -e . + pip install -r requirements.txt pip install -r tests/requirements.txt - - name: Test file-system only package + - name: Default file system run: | - python -m pytest --cov-fail-under=0 tests/test_backend_filesystem_only.py tests/test_backend_filesystem.py + echo "AUDBACKEND_TEST_FS=dir" >> $GITHUB_ENV - - name: Install package with all dependencies + - name: Use minio file system run: | - pip install -r requirements.txt + ping -c1 -W1 play.minio.io >/dev/null && FS="minio" || FS="none" + echo "AUDBACKEND_TEST_FS=${FS}" >> $GITHUB_ENV + if: matrix.fs == 'minio' - name: Test with pytest env: ARTIFACTORY_USERNAME: ${{ secrets.ARTIFACTORY_USERNAME }} ARTIFACTORY_API_KEY: ${{ secrets.ARTIFACTORY_API_KEY }} run: | + # export AUDBACKEND_TEST_FS=${{ env.AUDBACKEND_TEST_FS }} + echo $AUDBACKEND_TEST_FS python -m pytest --cov-config=.coveragerc.${{ runner.os }} + if: ${{ env.AUDBACKEND_TEST_FS != 'none' }} - name: Upload coverage to Codecov uses: codecov/codecov-action@v4 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml - if: matrix.os == 'ubuntu-latest' + if: ${{ (matrix.os == 'ubuntu-latest') && (env.AUDBACKEND_TEST_FS != 'none') }} diff --git a/audbackend/__init__.py b/audbackend/__init__.py index 3a853bfb..85ce209b 100644 --- a/audbackend/__init__.py +++ b/audbackend/__init__.py @@ -1,19 +1,8 @@ -from audbackend import backend -from audbackend import interface -from audbackend.core.api import access -from audbackend.core.api import create -from audbackend.core.api import delete -from audbackend.core.api import register -from audbackend.core.backend.base import Base as Backend # legacy -from audbackend.core.backend.filesystem import FileSystem # legacy +from audbackend.core.base import AbstractBackend from audbackend.core.errors import BackendError -from audbackend.core.repository import Repository - -# Import optional backends (legacy) -try: - from audbackend.core.backend.artifactory import Artifactory -except ImportError: # pragma: no cover - pass +from audbackend.core.maven import Maven +from audbackend.core.unversioned import Unversioned +from audbackend.core.versioned import Versioned __all__ = [] diff --git a/audbackend/backend/__init__.py b/audbackend/backend/__init__.py deleted file mode 100644 index 06be3a6a..00000000 --- a/audbackend/backend/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from audbackend.core.backend.base import Base -from audbackend.core.backend.filesystem import FileSystem - -# Import optional backends -try: - from audbackend.core.backend.artifactory import Artifactory -except ImportError: # pragma: no cover - pass diff --git a/audbackend/core/api.py b/audbackend/core/api.py deleted file mode 100644 index 890fa65b..00000000 --- a/audbackend/core/api.py +++ /dev/null @@ -1,235 +0,0 @@ -import typing -import warnings - -import audeer - -from audbackend.core import utils -from audbackend.core.backend.base import Base -from audbackend.core.backend.filesystem import FileSystem -from audbackend.core.interface.base import Base as Interface -from audbackend.core.interface.versioned import Versioned - - -backends = {} -r"""Backend cache.""" - -backend_registry = {} -r"""Backend registry.""" - - -def _backend( - name: str, - host: str, - repository: str, -) -> Base: - r"""Get backend instance.""" - if name not in backend_registry: - raise ValueError( - f"A backend class with name " - f"'{name}' " - f"does not exist. " - f"Use 'audbackend.register()' to register one." - ) - - if name not in backends: - backends[name] = {} - if host not in backends[name]: - backends[name][host] = {} - if repository not in backends[name][host]: - backend_cls = backend_registry[name] - backend = backend_cls(host, repository) - backends[name][host][repository] = backend - - backend = backends[name][host][repository] - return backend - - -@audeer.deprecated( - removal_version="2.2.0", - alternative="Backend.__init__() of corresponding backend", -) -def access( - name: str, - host: str, - repository: str, - *, - interface: typing.Type[Interface] = Versioned, - interface_kwargs: dict = None, -) -> Interface: - r"""Access repository. - - Returns an ``interface`` instance - to access the ``repository`` - located at ``host`` - on the backend with alias ``name`` - (see :func:`audbackend.register`). - - .. Warning:: - - ``audbackend.access()`` is deprecated - and will be removed in version 2.2.0. - Repositories on backends are instead accessed - by instantiating the corresponding backend class, - and connecting to it using the ``open()`` method, - e.g. - - .. code-block:: python - - backend = audbackend.backend.FileSystem(host, repo) - backend.open() - - Args: - name: backend alias - host: host address - repository: repository name - interface: interface class - interface_kwargs: keyword arguments for interface class - - Returns: - interface object - - Raises: - BackendError: if an error is raised on the backend, - e.g. repository does not exist - ValueError: if no backend class with alias ``name`` - has been registered - - """ # noqa: E501 - backend = _backend(name, host, repository) - utils.call_function_on_backend(backend._open) - interface_kwargs = interface_kwargs or {} - return interface(backend, **interface_kwargs) - - -@audeer.deprecated( - removal_version="2.2.0", - alternative="class method Backend.create() of corresponding backend", -) -def create( - name: str, - host: str, - repository: str, -): - r"""Create repository. - - Creates ``repository`` - located at ``host`` - on the backend with alias ``name`` - (see :func:`audbackend.register`). - - .. note:: For legacy reasons the method - returns an (undocumented) instance of - :class:`audbackend.interface.Versioned`. - Since the return value might be removed in - a future version it is not recommended to use it. - - .. Warning:: - - ``audbackend.create()`` is deprecated - and will be removed in version 2.2.0. - Repositories on backends are instead created - by the class method ``create()`` - for the desired backend, - e.g. :meth:`audbackend.backend.FileSystem.create`. - - Args: - name: backend alias - host: host address - repository: repository name - - Raises: - BackendError: if an error is raised on the backend, - e.g. repository exists already - or cannot be created - ValueError: if no backend class with alias ``name`` - has been registered - - """ # noqa: E501 - backend = _backend(name, host, repository) - utils.call_function_on_backend(backend._create) - # for legacy reasons we return a versioned interface - return Versioned(backend) - - -@audeer.deprecated( - removal_version="2.2.0", - alternative="class method Backend.delete() of corresponding backend", -) -def delete( - name: str, - host: str, - repository: str, -): - r"""Delete repository. - - Deletes the repository - with name ``repository`` - located at ``host`` - on the backend with alias ``name``. - - .. Warning:: - - ``audbackend.delete()`` is deprecated - and will be removed in version 2.2.0. - Repositories on backends are instead deleted - by the class method ``delete()`` - for the desired backend, - e.g. :meth:`audbackend.backend.FileSystem.delete`. - - Args: - name: backend alias - host: host address - repository: repository name - - Raises: - BackendError: if an error is raised on the backend, - e.g. repository does not exist - ValueError: if no backend class with alias ``name`` - has been registered - - """ # noqa: E501 - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - interface = access(name, host, repository) - utils.call_function_on_backend(interface._backend._delete) - backends[name][host].pop(repository) - - -@audeer.deprecated(removal_version="2.2.0", alternative="backend classes directly") -def register( - name: str, - cls: typing.Type[Base], -): - r"""Register backend class. - - If there is already a backend class - registered under the alias ``name`` - it will be overwritten. - - .. Warning:: - - ``audbackend.register()`` is deprecated - and will be removed in version 2.2.0. - Instead of backend names - we now use backend classes, - such as :class:`audbackend.backend.FileSystem`. - - Args: - name: backend alias - cls: backend class - - """ - backend_registry[name] = cls - - -with warnings.catch_warnings(): - warnings.simplefilter("ignore") - register("file-system", FileSystem) - - # Register optional backends - try: - from audbackend.core.backend.artifactory import Artifactory - - register("artifactory", Artifactory) - except ImportError: # pragma: no cover - pass diff --git a/audbackend/core/backend/__init__.py b/audbackend/core/backend/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/audbackend/core/backend/artifactory.py b/audbackend/core/backend/artifactory.py deleted file mode 100644 index 0a411d8c..00000000 --- a/audbackend/core/backend/artifactory.py +++ /dev/null @@ -1,358 +0,0 @@ -import os -import typing - -import artifactory -import dohq_artifactory -import requests - -import audeer - -from audbackend.core import utils -from audbackend.core.backend.base import Base - - -def _deploy( - src_path: str, - dst_path: artifactory.ArtifactoryPath, - checksum: str, - *, - verbose: bool = False, -): - r"""Deploy local file as an artifact.""" - if verbose: # pragma: no cover - desc = audeer.format_display_message( - f"Deploy {src_path}", - pbar=False, - ) - print(desc, end="\r") - - if not dst_path.parent.exists(): - dst_path.parent.mkdir() - - with open(src_path, "rb") as fd: - dst_path.deploy(fd, md5=checksum, quote_parameters=True) - - if verbose: # pragma: no cover - # Clear progress line - print(audeer.format_display_message(" ", pbar=False), end="\r") - - -def _download( - src_path: artifactory.ArtifactoryPath, - dst_path: str, - *, - chunk: int = 4 * 1024, - verbose=False, -): - r"""Download an artifact.""" - src_size = artifactory.ArtifactoryPath.stat(src_path).size - - with audeer.progress_bar(total=src_size, disable=not verbose) as pbar: - desc = audeer.format_display_message( - "Download {}".format(os.path.basename(str(src_path))), - pbar=True, - ) - pbar.set_description_str(desc) - pbar.refresh() - - dst_size = 0 - with src_path.open() as src_fp: - with open(dst_path, "wb") as dst_fp: - while src_size > dst_size: - data = src_fp.read(chunk) - n_data = len(data) - if n_data > 0: - dst_fp.write(data) - dst_size += n_data - pbar.update(n_data) - - -class Artifactory(Base): - r"""Backend for Artifactory. - - Args: - host: host address - repository: repository name - authentication: username, password / API key / access token tuple. - If ``None``, - it requests it by calling :meth:`get_authentication` - - """ # noqa: E501 - - def __init__( - self, - host: str, - repository: str, - *, - authentication: typing.Tuple[str, str] = None, - ): - super().__init__(host, repository, authentication=authentication) - - if authentication is None: - self.authentication = self.get_authentication(host) - - # Store ArtifactoryPath object to the repository, - # when opening the backend. - self._repo = None - - # Store request.Session as handed to ArtifactoryPath - self._session = None - - @classmethod - def get_authentication(cls, host: str) -> typing.Tuple[str, str]: - """Username and password/access token for given host. - - Returns a username - and password / API key / access token, - which can be used to authenticate - with an Artifactory server. - - Note, API keys are deprecated - and will no longer work - with newer versions of Artifactory. - - To get the username, - password/access token combination, - the function looks first - for the two environment variables - ``ARTIFACTORY_USERNAME`` and - ``ARTIFACTORY_API_KEY``. - Otherwise, - it tries to extract missing values - from a global `config file`_. - The default path of the config file - (:file:`~/.artifactory_python.cfg`) - can be overwritten with the environment variable - ``ARTIFACTORY_CONFIG_FILE``. - If no config file exists - or if it does not contain an - entry for the ``host``, - the username is set to ``'anonymous'`` - and the password/key to an empty string. - In that case the ``host`` - has to support anonymous access, - when trying to authenticate. - - .. _`config file`: https://devopshq.github.io/artifactory/#global-configuration-file - - Args: - host: hostname of Artifactory backend - - Returns: - username, password / API key / access token tuple - - """ - username = os.getenv("ARTIFACTORY_USERNAME", None) - api_key = os.getenv("ARTIFACTORY_API_KEY", None) - config_file = os.getenv( - "ARTIFACTORY_CONFIG_FILE", - artifactory.default_config_path, - ) - config_file = audeer.path(config_file) - - if os.path.exists(config_file) and (api_key is None or username is None): - config = artifactory.read_config(config_file) - config_entry = artifactory.get_config_entry(config, host) - - if config_entry is not None: - if username is None: - username = config_entry.get("username", None) - if api_key is None: - api_key = config_entry.get("password", None) - - if username is None: - username = "anonymous" - if api_key is None: - api_key = "" - - return username, api_key - - def _checksum( - self, - path: str, - ) -> str: - r"""MD5 checksum of file on backend.""" - path = self.path(path) - checksum = artifactory.ArtifactoryPath.stat(path).md5 - return checksum - - def _close( - self, - ): - r"""Close connection to repository. - - An error should be raised, - if the connection to the backend - cannot be closed. - - """ - if self._session is not None: - self._session.close() - - def _collapse( - self, - path, - ): - r"""Convert to virtual path. - - // - -> - / - - """ - path = path[len(str(self.path("/"))) - 1 :] - path = path.replace("/", self.sep) - return path - - def _copy_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - r"""Copy file on backend.""" - src_path = self.path(src_path) - dst_path = self.path(dst_path) - if not dst_path.parent.exists(): - dst_path.parent.mkdir() - src_path.copy(dst_path) - - def _create( - self, - ): - r"""Access existing repository.""" - with requests.Session() as session: - session.auth = self.authentication - path = artifactory.ArtifactoryPath(self.host, session=session) - repo = dohq_artifactory.RepositoryLocal( - path, - self.repository, - package_type=dohq_artifactory.RepositoryLocal.GENERIC, - ) - if repo.path.exists(): - utils.raise_file_exists_error(str(repo.path)) - repo.create() - - def _date( - self, - path: str, - ) -> str: - r"""Get last modification date of file on backend.""" - path = self.path(path) - date = path.stat().mtime - date = utils.date_format(date) - return date - - def _delete( - self, - ): - r"""Delete repository and all its content.""" - with self: - self._repo.delete() - - def _exists( - self, - path: str, - ) -> bool: - r"""Check if file exists on backend.""" - path = self.path(path) - return path.exists() - - def _get_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - r"""Get file from backend.""" - src_path = self.path(src_path) - _download(src_path, dst_path, verbose=verbose) - - def _ls( - self, - path: str, - ) -> typing.List[str]: - r"""List all files under sub-path.""" - path = self.path(path) - if not path.exists(): - return [] - - paths = [str(x) for x in path.glob("**/*") if x.is_file()] - paths = [self._collapse(path) for path in paths] - - return paths - - def _move_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - r"""Move file on backend.""" - src_path = self.path(src_path) - dst_path = self.path(dst_path) - if not dst_path.parent.exists(): - dst_path.parent.mkdir() - src_path.move(dst_path) - - def _open( - self, - ): - r"""Open connection to backend.""" - self._session = requests.Session() - self._session.auth = self.authentication - path = artifactory.ArtifactoryPath(self.host, session=self._session) - self._repo = path.find_repository(self.repository) - if self._repo is None: - utils.raise_file_not_found_error(self.repository) - - def _owner( - self, - path: str, - ) -> str: - r"""Get owner of file on backend.""" - path = self.path(path) - owner = path.stat().modified_by - return owner - - def path( - self, - path: str, - ) -> artifactory.ArtifactoryPath: - r"""Convert to backend path. - - This extends the relative ``path`` on the backend - by :attr:`host` and :attr:`repository`, - and returns an :class:`artifactory.ArtifactoryPath` object. - - Args: - path: path on backend - - Returns: - Artifactory path object - - """ - path = path.replace(self.sep, "/") - if path.startswith("/"): - path = path[1:] - # path -> host/repository/path - return self._repo / path - - def _put_file( - self, - src_path: str, - dst_path: str, - checksum: str, - verbose: bool, - ): - r"""Put file to backend.""" - dst_path = self.path(dst_path) - _deploy(src_path, dst_path, checksum, verbose=verbose) - - def _remove_file( - self, - path: str, - ): - r"""Remove file from backend.""" - path = self.path(path) - path.unlink() diff --git a/audbackend/core/backend/filesystem.py b/audbackend/core/backend/filesystem.py deleted file mode 100644 index 9f53ccb7..00000000 --- a/audbackend/core/backend/filesystem.py +++ /dev/null @@ -1,190 +0,0 @@ -import datetime -import os -import shutil -import typing - -import audeer - -from audbackend.core import utils -from audbackend.core.backend.base import Base - - -class FileSystem(Base): - r"""Backend for file system. - - Args: - host: host directory - repository: repository name - - """ - - def __init__( - self, - host: str, - repository: str, - ): - super().__init__(host, repository) - - self._root = audeer.path(host, repository) + os.sep - - def _checksum( - self, - path: str, - ) -> str: - r"""MD5 checksum of file on backend.""" - path = self._expand(path) - return audeer.md5(path) - - def _collapse( - self, - path, - ): - r"""Convert to virtual path. - - // - -> - / - - """ - path = path[len(self._root) - 1 :] # remove host and repo - path = path.replace(os.path.sep, self.sep) - return path - - def _copy_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - r"""Copy file on backend.""" - src_path = self._expand(src_path) - dst_path = self._expand(dst_path) - audeer.mkdir(os.path.dirname(dst_path)) - shutil.copy(src_path, dst_path) - - def _create( - self, - ): - r"""Access existing repository.""" - if os.path.exists(self._root): - utils.raise_file_exists_error(self._root) - - audeer.mkdir(self._root) - - def _date( - self, - path: str, - ) -> str: - r"""Get last modification date of file on backend.""" - path = self._expand(path) - date = os.path.getmtime(path) - date = datetime.datetime.fromtimestamp(date) - date = utils.date_format(date) - return date - - def _delete( - self, - ): - r"""Delete repository and all its content.""" - audeer.rmdir(self._root) - - def _exists( - self, - path: str, - ) -> bool: - r"""Check if file exists on backend.""" - path = self._expand(path) - return os.path.exists(path) - - def _expand( - self, - path: str, - ) -> str: - r"""Convert to backend path. - - - -> - // - - """ - path = path.replace(self.sep, os.path.sep) - if path.startswith(os.path.sep): - path = path[1:] - path = os.path.join(self._root, path) - return path - - def _get_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - r"""Get file from backend.""" - src_path = self._expand(src_path) - shutil.copy(src_path, dst_path) - - def _ls( - self, - path: str, - ) -> typing.List[str]: - r"""List all files under sub-path.""" - path = self._expand(path) - if not os.path.exists(path): - return [] - - paths = audeer.list_file_names( - path, - recursive=True, - hidden=True, - ) - paths = [self._collapse(path) for path in paths] - - return paths - - def _move_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - r"""Move file on backend.""" - src_path = self._expand(src_path) - dst_path = self._expand(dst_path) - audeer.mkdir(os.path.dirname(dst_path)) - audeer.move(src_path, dst_path) - - def _open( - self, - ): - r"""Open connection to backend.""" - if not os.path.exists(self._root): - utils.raise_file_not_found_error(self._root) - - def _owner( - self, - path: str, - ) -> str: - r"""Get owner of file on backend.""" - path = self._expand(path) - owner = utils.file_owner(path) - return owner - - def _put_file( - self, - src_path: str, - dst_path: str, - checksum: str, - verbose: bool, - ): - r"""Put file to backend.""" - dst_path = self._expand(dst_path) - audeer.mkdir(os.path.dirname(dst_path)) - shutil.copy(src_path, dst_path) - - def _remove_file( - self, - path: str, - ): - r"""Remove file from backend.""" - path = self._expand(path) - os.remove(path) diff --git a/audbackend/core/backend/base.py b/audbackend/core/base.py similarity index 57% rename from audbackend/core/backend/base.py rename to audbackend/core/base.py index c67f86f5..226455d0 100644 --- a/audbackend/core/backend/base.py +++ b/audbackend/core/base.py @@ -1,63 +1,63 @@ +import abc import fnmatch -import inspect +import hashlib import os +import re import tempfile import typing +import fsspec +import tqdm + import audeer from audbackend.core import utils from audbackend.core.errors import BackendError -backend_not_opened_error = ( - "Call 'Backend.open()' to establish a connection to the repository first." -) - +class AbstractBackend(metaclass=abc.ABCMeta): + r"""Abstract superclass for backends. -class Base: - r"""Backend base class. + Backend implementations are expected to be compatible with or, + better, + subclass from here. - Derive from this class to implement a new backend. + Args: + fs: filesystem object + following :mod:`fsspec` specifications """ def __init__( self, - host: str, - repository: str, - *, - authentication: typing.Any = None, + fs: fsspec.AbstractFileSystem, + **kwargs, ): - self.host = host - r"""Host path.""" - self.repository = repository - r"""Repository name.""" - self.authentication = authentication - r"""Object used for authentication, e.g. username, password tuple.""" - self.opened = False - r"""If a connection to the repository has been established.""" - - def __enter__(self): - r"""Open connection via context manager.""" - self.open() - return self - - def __exit__(self, type, value, traceback): - r"""Close connection via context manager.""" - self.close() - - def __repr__(self) -> str: # noqa: D105 + self.fs = fs + """Filesystem object.""" + + def __repr__( + self, + ) -> str: + r"""String representation. + + .. + >>> backend = AbstractBackend(filesystem) + + Examples: + >>> backend + audbackend.AbstractBackend(DirFileSystem) + + """ name = self.__class__.__name__ - return f"audbackend.backend.{name}('{self.host}', '{self.repository}')" + return f"audbackend.{name}({self.fs.__class__.__name__})" def _assert_equal_checksum( self, *, path: str, path_is_local: bool, - path_ref: str, - path_ref_is_local: bool, + expected_checksum: str, ): r"""Assert checksums are equal. @@ -65,23 +65,24 @@ def _assert_equal_checksum( ``path`` is removed and an error is raised. + Args: + path: path to file to check + path_is_local: if ``True`` + ``path`` is expected to be on the local disk + expected_checksum: expected checksum of ``path`` + """ if path_is_local: checksum = audeer.md5(path) else: - checksum = self.checksum(path) - - if path_ref_is_local: - checksum_ref = audeer.md5(path_ref) - else: - checksum_ref = self.checksum(path_ref) + checksum = self._checksum(path) - if checksum != checksum_ref: + if checksum != expected_checksum: if path_is_local: os.remove(path) location = "local file system" else: - self.remove_file(path) + self._remove_file(path) location = "backend" raise InterruptedError( @@ -90,24 +91,26 @@ def _assert_equal_checksum( f"has checksum " f"'{checksum}' " "when the expected checksum is " - f"'{checksum_ref}'. " + f"'{expected_checksum}'. " f"The file has been removed from the " f"{location}." ) - def _checksum( - self, - path: str, - ) -> str: # pragma: no cover - r"""MD5 checksum of file on backend.""" - raise NotImplementedError() - def checksum( self, path: str, + *args, + **kwargs, ) -> str: r"""MD5 checksum for file on backend. + Requires MD5 checksum + for comparison of the checksum across + different backends, + which is not guaranteed + by simply relying on + :meth:`fsspech.AbstractFileSystem.checksum`. + Args: path: path to file on backend @@ -123,58 +126,50 @@ def checksum( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - path = utils.check_path(path) - return utils.call_function_on_backend( - self._checksum, - path, - ) - - def _close( - self, - ): # pragma: no cover - r"""Close connection to repository. - - An error should be raised, - if the connection to the backend - cannot be closed. - - """ - pass - - def close( - self, - ): - r"""Close connection to backend. - - Raises: - BackendError: if an error is raised on the backend - - """ - if self.opened: - utils.call_function_on_backend(self._close) - self.opened = False + raise NotImplementedError - def _copy_file( + def _checksum( self, - src_path: str, - dst_path: str, - verbose: bool, - ): - r"""Copy file on backend. - - A default implementation is provided, - which temporarily gets the file from the backend - and afterward puts it to the new location. - It is recommended to overwrite the function - if backend supports a native way to copy files. + path: str, + ) -> str: + # Most filesystem object do not implement MD5 checksum, + # but use the standard implementation based on the info dict + # (https://github.com/fsspec/filesystem_spec/blob/76ca4a68885d572880ac6800f079738df562f02c/fsspec/spec.py#L692C16-L692C50): + # int(tokenize(self.info(path)), 16) + # + # We rely on the MD5 checksum + # to decide if a local file is identical to one on the backend. + # This information is then used to decide if `put_file()` + # has to overwrite a file on the backend or not. + + # Implementation compatible with audeer.md5() + def md5sum(path: str) -> str: + """Implementation compatible with audeer.md5(). + + Args: + path: path on backend + + Returns: + MD5 sum + + """ + hasher = hashlib.md5() + chunk_size = 8192 + with self.fs.open(path) as fp: + while True: + data = fp.read(chunk_size) + if not data: + break + hasher.update(data) + return hasher.hexdigest() + + info = utils.call_function_on_backend(self.fs.info, path) + if "ETag" in info: + md5 = ["ETag"][1:-1] # pragma: nocover + else: + md5 = utils.call_function_on_backend(md5sum, path) - """ - with tempfile.TemporaryDirectory() as tmp: - tmp_path = audeer.path(tmp, "~") - tmp_path = self.get_file(src_path, tmp_path, verbose=verbose) - self.put_file(tmp_path, dst_path, verbose=verbose) + return md5 def copy_file( self, @@ -183,6 +178,7 @@ def copy_file( *, validate: bool = False, verbose: bool = False, + **kwargs, ): r"""Copy file on backend. @@ -216,89 +212,45 @@ def copy_file( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - - src_path = utils.check_path(src_path) - dst_path = utils.check_path(dst_path) - - if src_path != dst_path and ( - not self.exists(dst_path) - or self.checksum(src_path) != self.checksum(dst_path) - ): - utils.call_function_on_backend( - self._copy_file, - src_path, - dst_path, - verbose, - ) - - if validate: - self._assert_equal_checksum( - path=dst_path, - path_is_local=False, - path_ref=src_path, - path_ref_is_local=False, - ) + raise NotImplementedError - def _create( + def _copy_file( self, - ): # pragma: no cover - r"""Create a new repository. - - * If repository exists already an error should be raised - - """ - raise NotImplementedError() - - @classmethod - def create( - cls, - host: str, - repository: str, - *, - authentication: typing.Any = None, + src_path: str, + dst_path: str, + validate: bool, + verbose: bool, ): - r"""Create repository. - - Creates ``repository`` - located at ``host`` - on the backend. - - Args: - host: host address - repository: repository name - authentication: object used for authentication, - e.g. a tuple with username and password - - Raises: - BackendError: if an error is raised on the backend, - e.g. repository exists already - or cannot be created - - """ - signature = inspect.signature(cls) - if "authentication" in signature.parameters: - backend = cls(host, repository, authentication=authentication) - else: - backend = cls(host, repository) - utils.call_function_on_backend(backend._create) - - def _date( - self, - path: str, - ) -> str: # pragma: no cover - r"""Last modification date of file on backend. - - * Return empty string if date cannot be determined - * Format should be '%Y-%m-%d' + if src_path == dst_path: + return - """ - raise NotImplementedError() + src_checksum = self._checksum(src_path) + dst_exists = self._exists(dst_path) + + def copy(src_path, dst_path): + # Copy only if dst_path does not exist or has a different checksum + if not dst_exists or src_checksum != self._checksum(dst_path): + # Remove dst_path if existent + if dst_exists: + self._remove_file(dst_path) + # Ensure sub-paths exist + self.fs.makedirs(os.path.dirname(dst_path), exist_ok=True) + self.fs.copy(src_path, dst_path, callback=pbar("Copy file", verbose)) + + utils.call_function_on_backend(copy, src_path, dst_path) + + if validate: + self._assert_equal_checksum( + path=dst_path, + path_is_local=False, + expected_checksum=src_checksum, + ) def date( self, path: str, + *args, + **kwargs, ) -> str: r"""Last modification date of file on backend. @@ -320,65 +272,22 @@ def date( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - path = utils.check_path(path) - return utils.call_function_on_backend( - self._date, - path, - ) + raise NotImplementedError - def _delete( - self, - ): # pragma: no cover - r"""Delete repository and all its content.""" - raise NotImplementedError() - - @classmethod - def delete( - cls, - host: str, - repository: str, - *, - authentication: typing.Any = None, - ): - r"""Delete repository. - - Deletes ``repository`` - located at ``host`` - on the backend. - - Args: - host: host address - repository: repository name - authentication: access token - for possible authentication, - e.g. username, password tuple - - Raises: - BackendError: if an error is raised on the backend, - e.g. repository does not exist - - """ - signature = inspect.signature(cls) - if "authentication" in signature.parameters: - backend = cls(host, repository, authentication=authentication) - else: - backend = cls(host, repository) - utils.call_function_on_backend(backend._delete) - - def _exists( + def _date( self, path: str, - ) -> bool: # pragma: no cover - r"""Check if file exists on backend.""" - raise NotImplementedError() + ) -> str: + date = utils.call_function_on_backend(self.fs.modified, path) + date = utils.date_format(date) + return date def exists( self, path: str, - *, + *args, suppress_backend_errors: bool = False, + **kwargs, ) -> bool: r"""Check if file exists on backend. @@ -403,11 +312,15 @@ def exists( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - path = utils.check_path(path) + raise NotImplementedError + + def _exists( + self, + path: str, + suppress_backend_errors: bool = False, + ) -> bool: return utils.call_function_on_backend( - self._exists, + self.fs.exists, path, suppress_backend_errors=suppress_backend_errors, fallback_return_value=False, @@ -417,10 +330,11 @@ def get_archive( self, src_path: str, dst_root: str, - *, + *args, tmp_root: str = None, validate: bool = False, verbose: bool = False, + **kwargs, ) -> typing.List[str]: r"""Get archive from backend and extract. @@ -466,46 +380,30 @@ def get_archive( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - - src_path = utils.check_path(src_path) + src_path = self._path(src_path) + return self._get_archive(src_path, dst_root, tmp_root, validate, verbose) - with tempfile.TemporaryDirectory(dir=tmp_root) as tmp: - tmp_root = audeer.path(tmp, os.path.basename(dst_root)) - local_archive = os.path.join( - tmp_root, - os.path.basename(src_path), - ) - self.get_file( - src_path, - local_archive, - validate=validate, - verbose=verbose, - ) - - return audeer.extract_archive( - local_archive, - dst_root, - verbose=verbose, - ) - - def _get_file( + def _get_archive( self, src_path: str, - dst_path: str, + dst_root: str, + tmp_root: str, + validate: bool, verbose: bool, - ): # pragma: no cover - r"""Get file from backend.""" - raise NotImplementedError() + ) -> str: + with tempfile.TemporaryDirectory(dir=tmp_root) as tmp: + local_archive = os.path.join(tmp, os.path.basename(src_path)) + self._get_file(src_path, local_archive, validate, verbose) + return audeer.extract_archive(local_archive, dst_root, verbose=verbose) def get_file( self, src_path: str, dst_path: str, - *, + *args, validate: bool = False, verbose: bool = False, + **kwargs, ) -> str: r"""Get file from backend. @@ -550,35 +448,45 @@ def get_file( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) + raise NotImplementedError - src_path = utils.check_path(src_path) + def _get_file( + self, + src_path: str, + dst_path: str, + validate: bool, + verbose: bool, + ) -> str: dst_path = audeer.path(dst_path) + + # Raise error if dst_path is a folder if os.path.isdir(dst_path): raise utils.raise_is_a_directory(dst_path) - dst_root = os.path.dirname(dst_path) - audeer.mkdir(dst_root) - - if not os.access(dst_root, os.W_OK) or ( - os.path.exists(dst_path) and not os.access(dst_path, os.W_OK) - ): # pragma: no Windows cover - msg = f"Permission denied: '{dst_path}'" - raise PermissionError(msg) - - if not os.path.exists(dst_path) or audeer.md5(dst_path) != self.checksum( - src_path - ): - # get file to a temporary directory first, - # only on success move to final destination + # Get file only if it does not exist or has different checksum + src_checksum = self._checksum(src_path) + if not os.path.exists(dst_path) or src_checksum != audeer.md5(dst_path): + # Ensure sub-paths of dst_path exists + dst_root = os.path.dirname(dst_path) + audeer.mkdir(dst_root) + + # Raise error if we don't have write permissions to dst_root + if not os.access(dst_root, os.W_OK) or ( + os.path.exists(dst_path) and not os.access(dst_path, os.W_OK) + ): # pragma: no Windows cover + msg = f"Permission denied: '{dst_path}'" + raise PermissionError(msg) + + # Get file to a temporary directory first, + # only on success move to final destination. + # This also overwrites a potential existing dst_path with tempfile.TemporaryDirectory(dir=dst_root) as tmp: tmp_path = audeer.path(tmp, "~") utils.call_function_on_backend( - self._get_file, + self.fs.get_file, src_path, tmp_path, - verbose, + callback=pbar("Get file", verbose), ) audeer.move_file(tmp_path, dst_path) @@ -586,8 +494,7 @@ def get_file( self._assert_equal_checksum( path=dst_path, path_is_local=True, - path_ref=src_path, - path_ref_is_local=False, + expected_checksum=src_checksum, ) return dst_path @@ -597,7 +504,7 @@ def join( path: str, *paths, ) -> str: - r"""Join to (sub-)path on backend. + r"""Join to path on backend. Args: path: first part of path @@ -611,44 +518,38 @@ def join( or does not start with ``'/'``, or if joined path contains invalid character + .. + >>> backend = AbstractBackend(filesystem) + + Examples: + >>> backend.join("/", "file.txt") + '/file.txt' + >>> backend.join("/sub", "file.txt") + '/sub/file.txt' + >>> backend.join("//sub//", "/", "", None, "/file.txt") + '/sub/file.txt' + """ - path = utils.check_path(path, allow_sub_path=True) + path = self._path(path, allow_sub_path=True) paths = [path] + [p for p in paths] paths = [path for path in paths if path] # remove empty or None path = self.sep.join(paths) - path = utils.check_path(path, allow_sub_path=True) + path = self._path(path, allow_sub_path=True) return path - def _ls( - self, - path: str, - ) -> typing.List[str]: # pragma: no cover - r"""List all files under sub-path. - - If ``path`` does not exist - an empty list can be returned. - - """ - raise NotImplementedError() - def ls( self, path: str = "/", - *, + *args, pattern: str = None, suppress_backend_errors: bool = False, + **kwargs, ) -> typing.List[str]: r"""List files on backend. - Returns a sorted list of tuples - with path and version. - If a full path - (e.g. ``/sub/file.ext``) - is provided, - all versions of the path are returned. If a sub-path (e.g. ``/sub/``) is provided, @@ -670,7 +571,7 @@ def ls( and return an empty list Returns: - list of tuples (path, version) + list of files Raises: BackendError: if ``suppress_backend_errors`` is ``False`` @@ -680,28 +581,29 @@ def ls( does not match ``'[A-Za-z0-9/._-]+'`` RuntimeError: if backend was not opened - """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - - path = utils.check_path(path, allow_sub_path=True) - - if path.endswith("/"): # find files under sub-path - paths = utils.call_function_on_backend( - self._ls, - path, - suppress_backend_errors=suppress_backend_errors, - fallback_return_value=[], - ) + """ # noqa: E501 + raise NotImplementedError - else: # find path - if self.exists(path): - paths = [path] - else: - paths = [] + def _ls( + self, + path: str, + suppress_backend_errors: bool, + pattern: str = None, + ) -> typing.List[str]: + # Find all files under path + paths = utils.call_function_on_backend( + self.fs.find, + path, + suppress_backend_errors=suppress_backend_errors, + fallback_return_value=[], + ) + # Sort and ensure each path starts with a sep + paths = sorted( + [path if path.startswith(self.sep) else self.sep + path for path in paths] + ) if not paths: - if path != "/" and not suppress_backend_errors: + if path != self.sep and not suppress_backend_errors: # if the path does not exist # we raise an error try: @@ -711,38 +613,24 @@ def ls( return [] - paths = sorted(paths) - + # Filter for matching pattern if pattern: - paths = [p for p in paths if fnmatch.fnmatch(os.path.basename(p), pattern)] + paths = [ + path + for path in paths + if fnmatch.fnmatch(os.path.basename(path), pattern) + ] return paths - def _move_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - r"""Move file on backend. - - A default implementation is provided, - which calls `:func:audbackend.Base.copy_file` - and afterward removes the source file from the backend. - It is recommended to overwrite the function - if backend supports a native way to move files. - - """ - self.copy_file(src_path, dst_path, verbose=verbose) - self.remove_file(src_path) - def move_file( self, src_path: str, dst_path: str, - *, + *args, validate: bool = False, verbose: bool = False, + **kwargs, ): r"""Move file on backend. @@ -780,131 +668,90 @@ def move_file( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - - src_path = utils.check_path(src_path) - dst_path = utils.check_path(dst_path) + raise NotImplementedError - if src_path == dst_path: - return - - if not self.exists(dst_path) or self.checksum(src_path) != self.checksum( - dst_path - ): - if validate: - self.copy_file( - src_path, - dst_path, - validate=True, - verbose=verbose, - ) - self.remove_file(src_path) - else: - utils.call_function_on_backend( - self._move_file, - src_path, - dst_path, - verbose, - ) - else: - self.remove_file(src_path) - - def _open( - self, - ): # pragma: no cover - r"""Open connection to backend. - - If repository does not exist, - or the backend cannot be opened, - an error should be raised. - - """ - pass - - def open( + def _move_file( self, + src_path: str, + dst_path: str, + validate: bool, + verbose: bool, ): - r"""Open connection to backend. - - Repository must exist, - use - :func:`audbackend.backend.Base.create` - to create it. - Finally, - use - :func:`audbackend.backend.Base.close` - to close the connection. - Instead of explicitly calling - :func:`audbackend.backend.Base.open` - and - :func:`audbackend.backend.Base.close` - it is good practice to use a with_ statement. - - Raises: - BackendError: if an error is raised on the backend, - e.g. ``repository`` does not exist - - .. _with: https://docs.python.org/3/reference/compound_stmts.html#with - - """ - if not self.opened: - utils.call_function_on_backend(self._open) - self.opened = True - - def _owner( - self, - path: str, - ) -> str: # pragma: no cover - r"""Owner of file on backend. - - * Return empty string if owner cannot be determined + if src_path == dst_path: + return - """ - raise NotImplementedError() + # To support validation, we first copy the file + self._copy_file(src_path, dst_path, validate, verbose) + self._remove_file(src_path) - def owner( + def path( self, path: str, + *args, + allow_sub_path: bool = False, + **kwargs, ) -> str: - r"""Owner of file on backend. + r"""Resolved backend path. - If the owner of the file - cannot be determined, - an empty string is returned. + Resolved path as handed to the filesystem object. Args: - path: path to file on backend + path: path on backend + allow_sub_path: if ``path`` is allowed + to point to a sub-path + instead of a file Returns: - owner + path as handed to the filesystem object Raises: - BackendError: if an error is raised on the backend, - e.g. ``path`` does not exist ValueError: if ``path`` does not start with ``'/'``, - ends on ``'/'``, + ends on ``'/'`` when ``allow_sub_path`` is ``False``, or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - path = utils.check_path(path) - return utils.call_function_on_backend( - self._owner, - path, - ) + raise NotImplementedError + + def _path(self, path: str, allow_sub_path: bool = False) -> str: + # Assert path starts with sep, but not ends on it + if not path.startswith(self.sep): + raise ValueError( + f"Invalid backend path '{path}', " f"must start with '{self.sep}'." + ) + if not allow_sub_path and path.endswith(self.sep): + raise ValueError( + f"Invalid backend path '{path}', " f"must not end on '{self.sep}'." + ) + + # Check for allowed characters. + # This is mainly motivated by the Artifactory filesystem, + # which allows only a very limited amount of characters + allowed_chars = "[A-Za-z0-9/._-]+" + if path and re.compile(allowed_chars).fullmatch(path) is None: + raise ValueError( + f"Invalid backend path '{path}', " f"does not match '{allowed_chars}'." + ) + + # Remove immediately consecutive seps + is_sub_path = path.endswith(self.sep) + paths = path.split(self.sep) + paths = [path for path in paths if path] + path = self.sep + self.sep.join(paths) + if is_sub_path and not path.endswith(self.sep): + path += self.sep + + return path def put_archive( self, src_root: str, dst_path: str, - *, + *args, files: typing.Union[str, typing.Sequence[str]] = None, tmp_root: str = None, validate: bool = False, verbose: bool = False, + **kwargs, ): r"""Create archive and put on backend. @@ -954,10 +801,18 @@ def put_archive( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) + dst_path = self._path(dst_path) + self._put_archive(src_root, dst_path, files, tmp_root, validate, verbose) - dst_path = utils.check_path(dst_path) + def _put_archive( + self, + src_root: str, + dst_path: str, + files: typing.Union[str, typing.Sequence[str]], + tmp_root: str, + validate: bool, + verbose: bool, + ): src_root = audeer.path(src_root) if tmp_root is not None: @@ -973,31 +828,16 @@ def put_archive( archive, verbose=verbose, ) - - self.put_file( - archive, - dst_path, - validate=validate, - verbose=verbose, - ) - - def _put_file( - self, - src_path: str, - dst_path: str, - checksum: str, - verbose: bool, - ): # pragma: no cover - r"""Put file to backend.""" - raise NotImplementedError() + self._put_file(archive, dst_path, validate, verbose) def put_file( self, src_path: str, dst_path: str, - *, + *args, validate: bool = False, verbose: bool = False, + **kwargs, ): r"""Put file on backend. @@ -1031,45 +871,45 @@ def put_file( RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) + raise NotImplementedError - dst_path = utils.check_path(dst_path) + def _put_file( + self, + src_path: str, + dst_path: str, + validate: bool, + verbose: bool, + ): if not os.path.exists(src_path): utils.raise_file_not_found_error(src_path) elif os.path.isdir(src_path): raise utils.raise_is_a_directory(src_path) - checksum = audeer.md5(src_path) - - # skip if file with same checksum already exists - if not self.exists(dst_path) or self.checksum(dst_path) != checksum: - utils.call_function_on_backend( - self._put_file, - src_path, - dst_path, - checksum, - verbose, + src_checksum = audeer.md5(src_path) + dst_exists = self._exists(dst_path) + + def put(src_path, dst_path): + # skip if file with same checksum already exists + if not dst_exists or src_checksum != self._checksum(dst_path): + if dst_exists: + self._remove_file(dst_path) + # Ensure sub-paths exist + self.fs.makedirs(os.path.dirname(dst_path), exist_ok=True) + self.fs.put_file(src_path, dst_path, callback=pbar("Put file", verbose)) + + utils.call_function_on_backend(put, src_path, dst_path) + if validate: + self._assert_equal_checksum( + path=dst_path, + path_is_local=False, + expected_checksum=src_checksum, ) - if validate: - self._assert_equal_checksum( - path=dst_path, - path_is_local=False, - path_ref=src_path, - path_ref_is_local=True, - ) - - def _remove_file( - self, - path: str, - ): # pragma: no cover - r"""Remove file from backend.""" - raise NotImplementedError() - def remove_file( self, path: str, + *args, + **kwargs, ): r"""Remove file from backend. @@ -1082,25 +922,34 @@ def remove_file( ValueError: if ``path`` does not start with ``'/'``, ends on ``'/'``, or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened """ - if not self.opened: - raise RuntimeError(backend_not_opened_error) - path = utils.check_path(path) - utils.call_function_on_backend( - self._remove_file, - path, - ) + raise NotImplementedError + + def _remove_file( + self, + path: str, + ): + utils.call_function_on_backend(self.fs.rm_file, path) @property - def sep(self) -> str: + def sep( + self, + ) -> str: r"""File separator on backend. - Returns: file separator + Returns: + file separator + + .. + >>> backend = AbstractBackend(filesystem) + + Examples: + >>> backend.sep + '/' """ - return utils.BACKEND_SEPARATOR + return "/" def split( self, @@ -1118,10 +967,42 @@ def split( ValueError: if ``path`` does not start with ``'/'`` or does not match ``'[A-Za-z0-9/._-]+'`` - """ - path = utils.check_path(path, allow_sub_path=True) + .. + >>> backend = AbstractBackend(filesystem) + + Examples: + >>> backend.split("/") + ('/', '') + >>> backend.split("/file.txt") + ('/', 'file.txt') + >>> backend.split("/sub/") + ('/sub/', '') + >>> backend.split("/sub//file.txt") + ('/sub/', 'file.txt') + """ + path = self._path(path, allow_sub_path=True) root = self.sep.join(path.split(self.sep)[:-1]) + self.sep basename = path.split(self.sep)[-1] return root, basename + + +def pbar( + desc: str, + verbose: bool, +) -> tqdm.tqdm: + r"""Progress bar for fsspec callbacks. + + Args: + desc: description of progress bar + verbose: if ``False`` don't show progress bar + + """ + return fsspec.callbacks.TqdmCallback( + tqdm_kwargs={ + "desc": desc, + "disable": not verbose, + }, + tqdm_cls=audeer.progress_bar, + ) diff --git a/audbackend/core/conftest.py b/audbackend/core/conftest.py index 4d533f68..06a3e857 100644 --- a/audbackend/core/conftest.py +++ b/audbackend/core/conftest.py @@ -1,50 +1,42 @@ import datetime -import os -import tempfile +from doctest import ELLIPSIS import pytest +from sybil import Sybil +from sybil.parsers.doctest import DocTestParser import audeer import audbackend +from tests.conftest import filesystem # noqa: F401 -class DoctestFileSystem(audbackend.backend.FileSystem): - def _date( - self, - path: str, - ) -> str: +@pytest.fixture(scope="function") +def mock_date(): + r"""Custom date method to return a fixed date.""" + + def date(path: str) -> str: date = datetime.datetime(1991, 2, 20) date = audbackend.core.utils.date_format(date) return date - def _owner( - self, - path: str, - ) -> str: - return "doctest" + yield date @pytest.fixture(scope="function", autouse=True) -def prepare_docstring_tests(doctest_namespace): - with tempfile.TemporaryDirectory() as tmp: - # Change to tmp dir - current_dir = os.getcwd() - os.chdir(tmp) - # Prepare backend - audeer.mkdir("host") - audbackend.backend.FileSystem.create("host", "repo") - # Provide example file `src.txt` - audeer.touch("src.txt") - # Provide DoctestFileSystem as FileSystem, - # and audbackend - # in docstring examples - doctest_namespace["DoctestFileSystem"] = DoctestFileSystem - doctest_namespace["audbackend"] = audbackend - - yield - - # Remove backend - audbackend.backend.FileSystem.delete("host", "repo") - # Change back to current dir - os.chdir(current_dir) +def prepare_docstring_tests(tmpdir, monkeypatch): + r"""Code to be run before each doctest.""" + # Change to tmp dir + monkeypatch.chdir(tmpdir) + + # Provide example file `src.txt` + audeer.touch("src.txt") + + yield + + +pytest_collect_file = Sybil( + parsers=[DocTestParser(optionflags=ELLIPSIS)], + pattern="*.py", + fixtures=["filesystem", "mock_date", "prepare_docstring_tests"], +).pytest() diff --git a/audbackend/core/errors.py b/audbackend/core/errors.py index ab7bd2a2..720d68e3 100644 --- a/audbackend/core/errors.py +++ b/audbackend/core/errors.py @@ -4,27 +4,14 @@ class BackendError(Exception): Args: exception: exception raised by backend - .. Prepare backend and interface for docstring examples - - >>> import audeer - >>> audeer.rmdir("host", "repo") - >>> _ = audeer.mkdir("host", "repo") - Examples: - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> try: - ... interface = audbackend.interface.Unversioned(backend) - ... interface.checksum("/does/not/exist") - ... except BackendError as ex: - ... ex.exception - FileNotFoundError(2, 'No such file or directory') + >>> import audbackend >>> try: - ... interface = audbackend.interface.Versioned(backend) - ... interface.checksum("/does/not/exist", "1.0.0") + ... backend = audbackend.Unversioned(filesystem) + ... backend.checksum("/does/not/exist") ... except BackendError as ex: ... ex.exception - FileNotFoundError(2, 'No such file or directory') + FileNotFoundError(2, '...') """ diff --git a/audbackend/core/interface/__init__.py b/audbackend/core/interface/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/audbackend/core/interface/base.py b/audbackend/core/interface/base.py deleted file mode 100644 index fa3b67dc..00000000 --- a/audbackend/core/interface/base.py +++ /dev/null @@ -1,169 +0,0 @@ -import typing - -from audbackend.core.backend.base import Base as Backend - - -class Base: - r"""Interface base class. - - Provides an interface to a backend, - see e.g. - :class:`audbackend.Unversioned` - and - :class:`audbackend.Versioned`. - - Derive from this class to - create a new interface. - - Args: - backend: backend object - - """ - - def __init__( - self, - backend: Backend, - ): - self._backend = backend - - def __repr__(self) -> str: # noqa: D105 - name = self.__class__.__name__ - return f"audbackend.interface.{name}({self._backend})" - - @property - def backend(self) -> Backend: - r"""Backend object. - - Returns: - backend object - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> interface = Base(backend) - - Examples: - >>> interface.backend - audbackend.backend.FileSystem('host', 'repo') - - """ - return self._backend - - @property - def host(self) -> str: - r"""Host path. - - Returns: host path - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> interface = Base(backend) - - Examples: - >>> interface.host - 'host' - - """ - return self.backend.host - - def join( - self, - path: str, - *paths, - ) -> str: - r"""Join to path on backend. - - Args: - path: first part of path - *paths: additional parts of path - - Returns: - path joined by :attr:`Backend.sep` - - Raises: - ValueError: if ``path`` contains invalid character - or does not start with ``'/'``, - or if joined path contains invalid character - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> interface = Base(backend) - - Examples: - >>> interface.join("/", "file.txt") - '/file.txt' - >>> interface.join("/sub", "file.txt") - '/sub/file.txt' - >>> interface.join("//sub//", "/", "", None, "/file.txt") - '/sub/file.txt' - - """ - return self.backend.join(path, *paths) - - @property - def repository(self) -> str: - r"""Repository name. - - Returns: - repository name - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> interface = Base(backend) - - Examples: - >>> interface.repository - 'repo' - - """ - return self.backend.repository - - @property - def sep(self) -> str: - r"""File separator on backend. - - Returns: - file separator - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> interface = Base(backend) - - Examples: - >>> interface.sep - '/' - - """ - return self.backend.sep - - def split( - self, - path: str, - ) -> typing.Tuple[str, str]: - r"""Split path on backend into sub-path and basename. - - Args: - path: path containing :attr:`Backend.sep` as separator - - Returns: - tuple containing (root, basename) - - Raises: - ValueError: if ``path`` does not start with ``'/'`` or - does not match ``'[A-Za-z0-9/._-]+'`` - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> interface = Base(backend) - - Examples: - >>> interface.split("/") - ('/', '') - >>> interface.split("/file.txt") - ('/', 'file.txt') - >>> interface.split("/sub/") - ('/sub/', '') - >>> interface.split("/sub//file.txt") - ('/sub/', 'file.txt') - - """ - return self.backend.split(path) diff --git a/audbackend/core/interface/unversioned.py b/audbackend/core/interface/unversioned.py deleted file mode 100644 index 58c70d5a..00000000 --- a/audbackend/core/interface/unversioned.py +++ /dev/null @@ -1,706 +0,0 @@ -import os # noqa: F401 -import typing - -from audbackend.core.interface.base import Base - - -class Unversioned(Base): - r"""Interface for unversioned file access. - - Use this interface if you don't care about versioning. - For every backend path exactly one file exists on the backend. - - Args: - backend: backend object - - Examples: - >>> file = "src.txt" - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - >>> interface.put_file(file, "/file.txt") - >>> interface.put_archive(".", "/sub/archive.zip", files=[file]) - >>> interface.ls() - ['/file.txt', '/sub/archive.zip'] - >>> interface.get_file("/file.txt", "dst.txt") - '...dst.txt' - - """ - - def checksum( - self, - path: str, - ) -> str: - r"""MD5 checksum for file on backend. - - Args: - path: path to file on backend - - Returns: - MD5 checksum - - Raises: - BackendError: if an error is raised on the backend, - e.g. ``path`` does not exist - ValueError: if ``path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> import audeer - >>> audeer.md5(file) - 'd41d8cd98f00b204e9800998ecf8427e' - >>> interface.put_file(file, "/file.txt") - >>> interface.checksum("/file.txt") - 'd41d8cd98f00b204e9800998ecf8427e' - - """ - return self.backend.checksum(path) - - def copy_file( - self, - src_path: str, - dst_path: str, - *, - validate: bool = False, - verbose: bool = False, - ): - r"""Copy file on backend. - - If ``dst_path`` exists - and has a different checksum, - it is overwritten. - Otherwise, - the operation is silently skipped. - - If ``validate`` is set to ``True``, - a final check is performed to assert that - ``src_path`` and ``dst_path`` - have the same checksum. - If it fails, - ``dst_path`` is removed and - an :class:`InterruptedError` is raised. - - Args: - src_path: source path to file on backend - dst_path: destination path to file on backend - validate: verify file was successfully copied - verbose: show debug messages - - Raises: - BackendError: if an error is raised on the backend - InterruptedError: if validation fails - ValueError: if ``src_path`` or ``dst_path`` - does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt") - >>> interface.exists("/copy.txt") - False - >>> interface.copy_file("/file.txt", "/copy.txt") - >>> interface.exists("/copy.txt") - True - - """ - self.backend.copy_file( - src_path, - dst_path, - validate=validate, - verbose=verbose, - ) - - def date( - self, - path: str, - ) -> str: - r"""Last modification date of file on backend. - - If the date cannot be determined, - an empty string is returned. - - Args: - path: path to file on backend - - Returns: - date in format ``'yyyy-mm-dd'`` - - Raises: - BackendError: if an error is raised on the backend, - e.g. ``path`` does not exist - ValueError: if ``path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = DoctestFileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt") - >>> interface.date("/file.txt") - '1991-02-20' - - """ - return self.backend.date(path) - - def exists( - self, - path: str, - *, - suppress_backend_errors: bool = False, - ) -> bool: - r"""Check if file exists on backend. - - Args: - path: path to file on backend - suppress_backend_errors: if set to ``True``, - silently catch errors raised on the backend - and return ``False`` - - Returns: - ``True`` if file exists - - Raises: - BackendError: if ``suppress_backend_errors`` is ``False`` - and an error is raised on the backend, - e.g. due to a connection timeout - ValueError: if ``path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - ValueError: if ``version`` is empty or - does not match ``'[A-Za-z0-9._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.exists("/file.txt") - False - >>> interface.put_file(file, "/file.txt") - >>> interface.exists("/file.txt") - True - - """ - return self.backend.exists( - path, - suppress_backend_errors=suppress_backend_errors, - ) - - def get_archive( - self, - src_path: str, - dst_root: str, - *, - tmp_root: str = None, - validate: bool = False, - verbose: bool = False, - ) -> typing.List[str]: - r"""Get archive from backend and extract. - - The archive type is derived from the extension of ``src_path``. - See :func:`audeer.extract_archive` for supported extensions. - - If ``dst_root`` does not exist, - it is created. - - If ``validate`` is set to ``True``, - a final check is performed to assert that - ``src_path`` and the retrieved archive - have the same checksum. - If it fails, - the retrieved archive is removed and - an :class:`InterruptedError` is raised. - - Args: - src_path: path to archive on backend - dst_root: local destination directory - tmp_root: directory under which archive is temporarily extracted. - Defaults to temporary directory of system - validate: verify archive was successfully - retrieved from the backend - verbose: show debug messages - - Returns: - extracted files - - Raises: - BackendError: if an error is raised on the backend, - e.g. ``src_path`` does not exist - FileNotFoundError: if ``tmp_root`` does not exist - InterruptedError: if validation fails - NotADirectoryError: if ``dst_root`` is not a directory - PermissionError: if the user lacks write permissions - for ``dst_path`` - RuntimeError: if extension of ``src_path`` is not supported - or ``src_path`` is a malformed archive - ValueError: if ``src_path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_archive(".", "/sub/archive.zip", files=[file]) - >>> os.remove(file) - >>> interface.get_archive("/sub/archive.zip", ".") - ['src.txt'] - - """ - return self.backend.get_archive( - src_path, - dst_root, - tmp_root=tmp_root, - validate=validate, - verbose=verbose, - ) - - def get_file( - self, - src_path: str, - dst_path: str, - *, - validate: bool = False, - verbose: bool = False, - ) -> str: - r"""Get file from backend. - - If the folder of - ``dst_path`` does not exist, - it is created. - - If ``dst_path`` exists - with a different checksum, - it is overwritten, - Otherwise, - the operation is silently skipped. - - If ``validate`` is set to ``True``, - a final check is performed to assert that - ``src_path`` and ``dst_path`` - have the same checksum. - If it fails, - ``dst_path`` is removed and - an :class:`InterruptedError` is raised. - - Args: - src_path: path to file on backend - dst_path: destination path to local file - validate: verify file was successfully - retrieved from the backend - verbose: show debug messages - - Returns: - full path to local file - - Raises: - BackendError: if an error is raised on the backend, - e.g. ``src_path`` does not exist - InterruptedError: if validation fails - IsADirectoryError: if ``dst_path`` points to an existing folder - PermissionError: if the user lacks write permissions - for ``dst_path`` - ValueError: if ``src_path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt") - >>> os.path.exists("dst.txt") - False - >>> _ = interface.get_file("/file.txt", "dst.txt") - >>> os.path.exists("dst.txt") - True - - """ - return self.backend.get_file( - src_path, - dst_path, - validate=validate, - verbose=verbose, - ) - - def ls( - self, - path: str = "/", - *, - pattern: str = None, - suppress_backend_errors: bool = False, - ) -> typing.List[str]: - r"""List files on backend. - - Returns a sorted list of tuples - with path and version. - If a full path - (e.g. ``/sub/file.ext``) - is provided, - all versions of the path are returned. - If a sub-path - (e.g. ``/sub/``) - is provided, - all files that start with - the sub-path are returned. - When ``path`` is set to ``'/'`` - a (possibly empty) list with - all files on the backend is returned. - - Args: - path: path or sub-path - (if it ends with ``'/'``) - on backend - pattern: if not ``None``, - return only files matching the pattern string, - see :func:`fnmatch.fnmatch` - suppress_backend_errors: if set to ``True``, - silently catch errors raised on the backend - and return an empty list - - Returns: - list of tuples (path, version) - - Raises: - BackendError: if ``suppress_backend_errors`` is ``False`` - and an error is raised on the backend, - e.g. ``path`` does not exist - ValueError: if ``path`` does not start with ``'/'`` or - does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt") - >>> interface.put_archive(".", "/sub/archive.zip", files=[file]) - >>> interface.ls() - ['/file.txt', '/sub/archive.zip'] - >>> interface.ls("/file.txt") - ['/file.txt'] - >>> interface.ls(pattern="*.txt") - ['/file.txt'] - >>> interface.ls(pattern="archive.*") - ['/sub/archive.zip'] - >>> interface.ls("/sub/") - ['/sub/archive.zip'] - - """ # noqa: E501 - return self.backend.ls( - path, - pattern=pattern, - suppress_backend_errors=suppress_backend_errors, - ) - - def move_file( - self, - src_path: str, - dst_path: str, - *, - validate: bool = False, - verbose: bool = False, - ): - r"""Move file on backend. - - If ``dst_path`` exists - and has a different checksum, - it is overwritten. - Otherwise, - ``src_path`` - is removed and the operation silently skipped. - - If ``validate`` is set to ``True``, - a final check is performed to assert that - ``src_path`` and ``dst_path`` - have the same checksum. - If it fails, - ``dst_path`` is removed and - an :class:`InterruptedError` is raised. - To ensure ``src_path`` still exists in this case - it is first copied and only removed - when the check has successfully passed. - - Args: - src_path: source path to file on backend - dst_path: destination path to file on backend - validate: verify file was successfully moved - verbose: show debug messages - - Raises: - BackendError: if an error is raised on the backend - InterruptedError: if validation fails - ValueError: if ``src_path`` or ``dst_path`` - does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt") - >>> interface.exists("/move.txt") - False - >>> interface.move_file("/file.txt", "/move.txt") - >>> interface.exists("/move.txt") - True - >>> interface.exists("/file.txt") - False - - """ - self.backend.move_file( - src_path, - dst_path, - validate=validate, - verbose=verbose, - ) - - def owner( - self, - path: str, - ) -> str: - r"""Owner of file on backend. - - If the owner of the file - cannot be determined, - an empty string is returned. - - Args: - path: path to file on backend - - Returns: - owner - - Raises: - BackendError: if an error is raised on the backend, - e.g. ``path`` does not exist - ValueError: if ``path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = DoctestFileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt") - >>> interface.owner("/file.txt") - 'doctest' - - """ - return self.backend.owner(path) - - def put_archive( - self, - src_root: str, - dst_path: str, - *, - files: typing.Union[str, typing.Sequence[str]] = None, - tmp_root: str = None, - validate: bool = False, - verbose: bool = False, - ): - r"""Create archive and put on backend. - - The archive type is derived from the extension of ``dst_path``. - See :func:`audeer.create_archive` for supported extensions. - - The operation is silently skipped, - if an archive with the same checksum - already exists on the backend. - - If ``validate`` is set to ``True``, - a final check is performed to assert that - the local archive and ``dst_path`` - have the same checksum. - If it fails, - ``dst_path`` is removed and - an :class:`InterruptedError` is raised. - - Args: - src_root: local root directory where files are located. - By default, - all files below ``src_root`` - will be included into the archive. - Use ``files`` to select specific files - dst_path: path to archive on backend - files: file(s) to include into the archive. - Must exist within ``src_root`` - tmp_root: directory under which archive is temporarily created. - Defaults to temporary directory of system - validate: verify archive was successfully - put on the backend - verbose: show debug messages - - Raises: - BackendError: if an error is raised on the backend - FileNotFoundError: if ``src_root``, - ``tmp_root``, - or one or more ``files`` do not exist - InterruptedError: if validation fails - NotADirectoryError: if ``src_root`` is not a folder - RuntimeError: if ``dst_path`` does not end with - ``zip`` or ``tar.gz`` - or a file in ``files`` is not below ``root`` - ValueError: if ``dst_path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.exists("/sub/archive.tar.gz") - False - >>> interface.put_archive(".", "/sub/archive.tar.gz") - >>> interface.exists("/sub/archive.tar.gz") - True - - """ - self.backend.put_archive( - src_root, - dst_path, - files=files, - tmp_root=tmp_root, - validate=validate, - verbose=verbose, - ) - - def put_file( - self, - src_path: str, - dst_path: str, - *, - validate: bool = False, - verbose: bool = False, - ): - r"""Put file on backend. - - The operation is silently skipped, - if a file with the same checksum - already exists on the backend. - - If ``validate`` is set to ``True``, - a final check is performed to assert that - ``src_path`` and ``dst_path`` - have the same checksum. - If it fails, - ``dst_path`` is removed and - an :class:`InterruptedError` is raised. - - Args: - src_path: path to local file - dst_path: path to file on backend - validate: verify file was successfully - put on the backend - verbose: show debug messages - - Raises: - BackendError: if an error is raised on the backend - FileNotFoundError: if ``src_path`` does not exist - InterruptedError: if validation fails - IsADirectoryError: if ``src_path`` is a folder - ValueError: if ``dst_path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.exists("/file.txt") - False - >>> interface.put_file(file, "/file.txt") - >>> interface.exists("/file.txt") - True - - """ - self.backend.put_file( - src_path, - dst_path, - validate=validate, - verbose=verbose, - ) - - def remove_file( - self, - path: str, - ): - r"""Remove file from backend. - - Args: - path: path to file on backend - - Raises: - BackendError: if an error is raised on the backend, - e.g. ``path`` does not exist - ValueError: if ``path`` does not start with ``'/'``, - ends on ``'/'``, - or does not match ``'[A-Za-z0-9/._-]+'`` - - .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Unversioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt") - >>> interface.exists("/file.txt") - True - >>> interface.remove_file("/file.txt") - >>> interface.exists("/file.txt") - False - - """ - self.backend.remove_file(path) diff --git a/audbackend/core/interface/maven.py b/audbackend/core/maven.py similarity index 77% rename from audbackend/core/interface/maven.py rename to audbackend/core/maven.py index aba8529e..6535088c 100644 --- a/audbackend/core/interface/maven.py +++ b/audbackend/core/maven.py @@ -3,18 +3,19 @@ import re import typing +import fsspec + import audeer from audbackend.core import utils -from audbackend.core.backend.base import Base as Backend from audbackend.core.errors import BackendError -from audbackend.core.interface.versioned import Versioned +from audbackend.core.versioned import Versioned class Maven(Versioned): - r"""Interface for Maven style versioned file access. + r"""Backend for Maven style versioned file access. - Use this interface, + Use this backend, if you want to version files similar to how it is handled by Maven. For each file on the backend path @@ -65,27 +66,25 @@ class Maven(Versioned): Examples: >>> file = "src.txt" - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Maven(backend) - >>> interface.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) + >>> backend = Versioned(filesystem) + >>> backend.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) >>> for version in ["1.0.0", "2.0.0"]: - ... interface.put_file(file, "/file.txt", version) - >>> interface.ls() + ... backend.put_file(file, "/file.txt", version) + >>> backend.ls() [('/file.txt', '1.0.0'), ('/file.txt', '2.0.0'), ('/sub/archive.zip', '1.0.0')] - >>> interface.get_file("/file.txt", "dst.txt", "2.0.0") + >>> backend.get_file("/file.txt", "dst.txt", "2.0.0") '...dst.txt' """ # noqa: E501 def __init__( self, - backend: Backend, + fs: fsspec.AbstractFileSystem, *, extensions: typing.Sequence[str] = [], regex: bool = False, ): - super().__init__(backend) + super().__init__(fs) self.extensions = extensions self.regex = regex @@ -139,34 +138,29 @@ def ls( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Maven(backend) + >>> backend = Maven(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) + >>> backend.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) >>> for version in ["1.0.0", "2.0.0"]: - ... interface.put_file(file, "/file.txt", version) - >>> interface.ls() + ... backend.put_file(file, "/file.txt", version) + >>> backend.ls() [('/file.txt', '1.0.0'), ('/file.txt', '2.0.0'), ('/sub/archive.zip', '1.0.0')] - >>> interface.ls(latest_version=True) + >>> backend.ls(latest_version=True) [('/file.txt', '2.0.0'), ('/sub/archive.zip', '1.0.0')] - >>> interface.ls("/file.txt") + >>> backend.ls("/file.txt") [('/file.txt', '1.0.0'), ('/file.txt', '2.0.0')] - >>> interface.ls(pattern="*.txt") + >>> backend.ls(pattern="*.txt") [('/file.txt', '1.0.0'), ('/file.txt', '2.0.0')] - >>> interface.ls(pattern="archive.*") + >>> backend.ls(pattern="archive.*") [('/sub/archive.zip', '1.0.0')] - >>> interface.ls("/sub/") + >>> backend.ls("/sub/") [('/sub/archive.zip', '1.0.0')] """ # noqa: E501 if path.endswith("/"): # find files under sub-path - paths = self.backend.ls( - path, - suppress_backend_errors=suppress_backend_errors, - ) + paths = self._ls(path, suppress_backend_errors) # Files are also stored as sub-folder, # e.g. `...///-`, # so we need to skip those @@ -184,10 +178,7 @@ def ls( # for available versions. # It will return entries in the form # `///-` - paths = self.backend.ls( - self.backend.join(root, name, self.sep), - suppress_backend_errors=suppress_backend_errors, - ) + paths = self._ls(self.join(root, name, self.sep), suppress_backend_errors) # filter for '///-x.x.x' depth = root.count("/") + 2 @@ -247,6 +238,61 @@ def ls( return paths_and_versions + def path( + self, + path: str, + version: str, + *, + allow_sub_path: bool = False, + ) -> str: + r"""Resolved backend path. + + Resolved path as handed to the filesystem object. + + / + -> + ///- + + Args: + path: path on backend + version: version string + allow_sub_path: if ``path`` is allowed + to point to a sub-path + instead of a file + + Returns: + path as handed to the filesystem object + + Raises: + ValueError: if ``path`` does not start with ``'/'``, + ends on ``'/'`` when ``allow_sub_path`` is ``False``, + or does not match ``'[A-Za-z0-9/._-]+'`` + + .. + >>> backend = Maven(filesystem) + + Examples: + >>> backend.path("/file.txt", "1.0.0") + '/file/1.0.0/file-1.0.0.txt' + + """ + path = self._path(path, allow_sub_path) + + # Assert version is not empty and does not contain invalid characters. + version_allowed_chars = "[A-Za-z0-9._-]+" + if not version: + raise ValueError("Version must not be empty.") + if re.compile(version_allowed_chars).fullmatch(version) is None: + raise ValueError( + f"Invalid version '{version}', " + f"does not match '{version_allowed_chars}'." + ) + + root, name = self.split(path) + base, ext = self._split_ext(name) + path = self.join(root, base, version, f"{base}-{version}{ext}") + return path + def _split_ext( self, name: str, @@ -274,21 +320,3 @@ def _split_ext( ext = f".{ext}" return base, ext - - def _path_with_version( - self, - path: str, - version: str, - ) -> str: - r"""Convert to versioned path. - - / - -> - ///- - - """ - version = utils.check_version(version) - root, name = self.split(path) - base, ext = self._split_ext(name) - path = self.join(root, base, version, f"{base}-{version}{ext}") - return path diff --git a/audbackend/core/repository.py b/audbackend/core/repository.py deleted file mode 100644 index 27725653..00000000 --- a/audbackend/core/repository.py +++ /dev/null @@ -1,52 +0,0 @@ -import warnings - - -class Repository: - r"""Repository object. - - It stores all information - needed to address a repository: - the repository name, - host, - and backend. - - .. Warning:: - - ``audbackend.Repository`` is deprecated - and will be removed in version 2.2.0. - If an application requires - repository objects, - that assign string names to backends, - they should be provided by the application. - - Args: - name: repository name - host: repository host - backend: repository backend - - """ - - def __init__( - self, - name: str, - host: str, - backend: str, - ): - self.name = name - r"""Repository name.""" - self.host = host - r"""Repository host.""" - self.backend = backend - r"""Repository backend.""" - - message = "Repository is deprecated and will be removed with version 2.2.0." - warnings.warn(message, category=UserWarning, stacklevel=2) - - def __repr__(self): # noqa: D105 - return ( - f"Repository(" - f"'{self.name}', " - f"'{self.host}', " - f"'{self.backend}'" - f")" - ) diff --git a/audbackend/core/unversioned.py b/audbackend/core/unversioned.py new file mode 100644 index 00000000..f28e9b9a --- /dev/null +++ b/audbackend/core/unversioned.py @@ -0,0 +1,494 @@ +import typing + +from audbackend.core.base import AbstractBackend + + +class Unversioned(AbstractBackend): + r"""Backend for unversioned file access. + + Use this backend if you don't care about versioning. + For every backend path exactly one file exists on the backend. + + Args: + backend: backend object + + Examples: + >>> file = "src.txt" + >>> backend = Unversioned(filesystem) + >>> backend.put_file(file, "/file.txt") + >>> backend.put_archive(".", "/sub/archive.zip", files=[file]) + >>> backend.ls() + ['/file.txt', '/sub/archive.zip'] + >>> backend.get_file("/file.txt", "dst.txt") + '...dst.txt' + + """ + + def checksum( + self, + path: str, + ) -> str: + r"""MD5 checksum for file on backend. + + Args: + path: path to file on backend + + Returns: + MD5 checksum + + Raises: + BackendError: if an error is raised on the backend, + e.g. ``path`` does not exist + ValueError: if ``path`` does not start with ``'/'``, + ends on ``'/'``, + or does not match ``'[A-Za-z0-9/._-]+'`` + RuntimeError: if backend was not opened + + .. + >>> backend = Unversioned(filesystem) + + Examples: + >>> file = "src.txt" + >>> import audeer + >>> audeer.md5(file) + 'd41d8cd98f00b204e9800998ecf8427e' + >>> backend.put_file(file, "/file.txt") + >>> backend.checksum("/file.txt") + 'd41d8cd98f00b204e9800998ecf8427e' + + """ + path = self.path(path) + return self._checksum(path) + + def copy_file( + self, + src_path: str, + dst_path: str, + *, + validate: bool = False, + verbose: bool = False, + ): + r"""Copy file on backend. + + If ``dst_path`` exists + and has a different checksum, + it is overwritten. + Otherwise, + the operation is silently skipped. + + If ``validate`` is set to ``True``, + a final check is performed to assert that + ``src_path`` and ``dst_path`` + have the same checksum. + If it fails, + ``dst_path`` is removed and + an :class:`InterruptedError` is raised. + + Args: + src_path: source path to file on backend + dst_path: destination path to file on backend + validate: verify file was successfully copied + verbose: show debug messages + + Raises: + BackendError: if an error is raised on the backend + InterruptedError: if validation fails + ValueError: if ``src_path`` or ``dst_path`` + does not start with ``'/'``, + ends on ``'/'``, + or does not match ``'[A-Za-z0-9/._-]+'`` + RuntimeError: if backend was not opened + + .. + >>> backend = Unversioned(filesystem) + + Examples: + >>> file = "src.txt" + >>> backend.put_file(file, "/file.txt") + >>> backend.exists("/copy.txt") + False + >>> backend.copy_file("/file.txt", "/copy.txt") + >>> backend.exists("/copy.txt") + True + + """ + src_path = self.path(src_path) + dst_path = self.path(dst_path) + self._copy_file(src_path, dst_path, validate, verbose) + + def date(self, path: str) -> str: + r"""Last modification date of file on backend. + + If the date cannot be determined, + an empty string is returned. + + Args: + path: path to file on backend + + Returns: + date in format ``'yyyy-mm-dd'`` + + Raises: + BackendError: if an error is raised on the backend, + e.g. ``path`` does not exist + ValueError: if ``path`` does not start with ``'/'``, + ends on ``'/'``, + or does not match ``'[A-Za-z0-9/._-]+'`` + RuntimeError: if backend was not opened + + .. + >>> backend = Unversioned(filesystem) + >>> backend._date = mock_date + + Examples: + >>> file = "src.txt" + >>> backend.put_file(file, "/file.txt") + >>> backend.date("/file.txt") + '1991-02-20' + + """ + path = self.path(path) + return self._date(path) + + def exists( + self, + path: str, + *, + suppress_backend_errors: bool = False, + ) -> bool: + r"""Check if file exists on backend. + + Args: + path: path to file on backend + suppress_backend_errors: if set to ``True``, + silently catch errors raised on the backend + and return ``False`` + + Returns: + ``True`` if file exists + + Raises: + BackendError: if ``suppress_backend_errors`` is ``False`` + and an error is raised on the backend, + e.g. due to a connection timeout + ValueError: if ``path`` does not start with ``'/'``, + ends on ``'/'``, + or does not match ``'[A-Za-z0-9/._-]+'`` + ValueError: if ``version`` is empty or + does not match ``'[A-Za-z0-9._-]+'`` + RuntimeError: if backend was not opened + + .. + >>> backend = Unversioned(filesystem) + + Examples: + >>> file = "src.txt" + >>> backend.exists("/file.txt") + False + >>> backend.put_file(file, "/file.txt") + >>> backend.exists("/file.txt") + True + + """ + path = self.path(path) + return self._exists(path, suppress_backend_errors) + + def get_file( + self, + src_path: str, + dst_path: str, + *, + validate: bool = False, + verbose: bool = False, + ) -> str: + r"""Get file from backend. + + If the folder of + ``dst_path`` does not exist, + it is created. + + If ``dst_path`` exists + with a different checksum, + it is overwritten, + Otherwise, + the operation is silently skipped. + + If ``validate`` is set to ``True``, + a final check is performed to assert that + ``src_path`` and ``dst_path`` + have the same checksum. + If it fails, + ``dst_path`` is removed and + an :class:`InterruptedError` is raised. + + Args: + src_path: path to file on backend + dst_path: destination path to local file + validate: verify file was successfully + retrieved from the backend + verbose: show debug messages + + Returns: + full path to local file + + Raises: + BackendError: if an error is raised on the backend, + e.g. ``src_path`` does not exist + InterruptedError: if validation fails + IsADirectoryError: if ``dst_path`` points to an existing folder + PermissionError: if the user lacks write permissions + for ``dst_path`` + ValueError: if ``src_path`` does not start with ``'/'``, + ends on ``'/'``, + or does not match ``'[A-Za-z0-9/._-]+'`` + RuntimeError: if backend was not opened + + .. + >>> import os + >>> backend = Unversioned(filesystem) + + Examples: + >>> file = "src.txt" + >>> backend.put_file(file, "/file.txt") + >>> os.path.exists("dst.txt") + False + >>> backend.get_file("/file.txt", "dst.txt") + '...dst.txt' + + """ + src_path = self.path(src_path) + return self._get_file(src_path, dst_path, validate, verbose) + + def ls( + self, + path: str = "/", + *, + pattern: str = None, + suppress_backend_errors: bool = False, + ) -> typing.List[str]: + r"""List files on backend. + + If a sub-path + (e.g. ``/sub/``) + is provided, + all files that start with + the sub-path are returned. + When ``path`` is set to ``'/'`` + a (possibly empty) list with + all files on the backend is returned. + + Args: + path: path or sub-path + (if it ends with ``'/'``) + on backend + pattern: if not ``None``, + return only files matching the pattern string, + see :func:`fnmatch.fnmatch` + suppress_backend_errors: if set to ``True``, + silently catch errors raised on the backend + and return an empty list + + Returns: + list of files + + Raises: + BackendError: if ``suppress_backend_errors`` is ``False`` + and an error is raised on the backend, + e.g. ``path`` does not exist + ValueError: if ``path`` does not start with ``'/'`` or + does not match ``'[A-Za-z0-9/._-]+'`` + RuntimeError: if backend was not opened + + .. + >>> backend = Unversioned(filesystem) + + Examples: + >>> file = "src.txt" + >>> backend.put_file(file, "/file.txt") + >>> backend.put_archive(".", "/sub/archive.zip", files=[file]) + >>> backend.ls() + ['/file.txt', '/sub/archive.zip'] + >>> backend.ls("/file.txt") + ['/file.txt'] + >>> backend.ls(pattern="*.txt") + ['/file.txt'] + >>> backend.ls(pattern="archive.*") + ['/sub/archive.zip'] + >>> backend.ls("/sub/") + ['/sub/archive.zip'] + + """ # noqa: E501 + path = self.path(path, allow_sub_path=True) + return self._ls(path, suppress_backend_errors, pattern) + + def move_file( + self, + src_path: str, + dst_path: str, + *, + validate: bool = False, + verbose: bool = False, + ): + r"""Move file on backend. + + If ``dst_path`` exists + and has a different checksum, + it is overwritten. + Otherwise, + ``src_path`` + is removed and the operation silently skipped. + + If ``validate`` is set to ``True``, + a final check is performed to assert that + ``src_path`` and ``dst_path`` + have the same checksum. + If it fails, + ``dst_path`` is removed and + an :class:`InterruptedError` is raised. + To ensure ``src_path`` still exists in this case + it is first copied and only removed + when the check has successfully passed. + + Args: + src_path: source path to file on backend + dst_path: destination path to file on backend + validate: verify file was successfully moved + verbose: show debug messages + + Raises: + BackendError: if an error is raised on the backend + InterruptedError: if validation fails + ValueError: if ``src_path`` or ``dst_path`` + does not start with ``'/'``, + ends on ``'/'``, + or does not match ``'[A-Za-z0-9/._-]+'`` + RuntimeError: if backend was not opened + + .. + >>> backend = Unversioned(filesystem) + + Examples: + >>> file = "src.txt" + >>> backend.put_file(file, "/file.txt") + >>> backend.exists("/move.txt") + False + >>> backend.move_file("/file.txt", "/move.txt") + >>> backend.exists("/move.txt") + True + >>> backend.exists("/file.txt") + False + + """ + src_path = self.path(src_path) + dst_path = self.path(dst_path) + self._move_file(src_path, dst_path, validate, verbose) + + def path( + self, + path: str, + *, + allow_sub_path: bool = False, + ) -> str: + r"""Resolved backend path. + + Resolved path as handed to the filesystem object. + + Args: + path: path on backend + allow_sub_path: if ``path`` is allowed + to point to a sub-path + instead of a file + + Returns: + path as handed to the filesystem object + + Raises: + ValueError: if ``path`` does not start with ``'/'``, + ends on ``'/'`` when ``allow_sub_path`` is ``False``, + or does not match ``'[A-Za-z0-9/._-]+'`` + + """ + return self._path(path, allow_sub_path) + + def put_file( + self, + src_path: str, + dst_path: str, + *, + validate: bool = False, + verbose: bool = False, + ): + r"""Put file on backend. + + The operation is silently skipped, + if a file with the same checksum + already exists on the backend. + + If ``validate`` is set to ``True``, + a final check is performed to assert that + ``src_path`` and ``dst_path`` + have the same checksum. + If it fails, + ``dst_path`` is removed and + an :class:`InterruptedError` is raised. + + Args: + src_path: path to local file + dst_path: path to file on backend + validate: verify file was successfully + put on the backend + verbose: show debug messages + + Raises: + BackendError: if an error is raised on the backend + FileNotFoundError: if ``src_path`` does not exist + InterruptedError: if validation fails + IsADirectoryError: if ``src_path`` is a folder + ValueError: if ``dst_path`` does not start with ``'/'``, + ends on ``'/'``, + or does not match ``'[A-Za-z0-9/._-]+'`` + RuntimeError: if backend was not opened + + .. + >>> backend = Unversioned(filesystem) + + Examples: + >>> file = "src.txt" + >>> backend.exists("/file.txt") + False + >>> backend.put_file(file, "/file.txt") + >>> backend.exists("/file.txt") + True + + """ + dst_path = self.path(dst_path) + self._put_file(src_path, dst_path, validate, verbose) + + def remove_file(self, path: str): + r"""Remove file from backend. + + Args: + path: path to file on backend + + Raises: + BackendError: if an error is raised on the backend, + e.g. ``path`` does not exist + ValueError: if ``path`` does not start with ``'/'``, + ends on ``'/'``, + or does not match ``'[A-Za-z0-9/._-]+'`` + + .. + >>> backend = Unversioned(filesystem) + + Examples: + >>> file = "src.txt" + >>> backend.put_file(file, "/file.txt") + >>> backend.exists("/file.txt") + True + >>> backend.remove_file("/file.txt") + >>> backend.exists("/file.txt") + False + + """ + path = self.path(path) + self._remove_file(path) diff --git a/audbackend/core/utils.py b/audbackend/core/utils.py index 9feaa0d3..0be3a439 100644 --- a/audbackend/core/utils.py +++ b/audbackend/core/utils.py @@ -1,21 +1,11 @@ import datetime import errno import os -import re import typing from audbackend.core.errors import BackendError -BACKEND_ALLOWED_CHARS = "[A-Za-z0-9/._-]+" -BACKEND_ALLOWED_CHARS_COMPILED = re.compile(BACKEND_ALLOWED_CHARS) - -BACKEND_SEPARATOR = "/" - -VERSION_ALLOWED_CHARS = BACKEND_ALLOWED_CHARS.replace(BACKEND_SEPARATOR, "") -VERSION_ALLOWED_CHARS_COMPILED = re.compile(VERSION_ALLOWED_CHARS) - - def call_function_on_backend( function: typing.Callable, *args, @@ -32,84 +22,10 @@ def call_function_on_backend( raise BackendError(ex) -def check_path( - path: str, - *, - allow_sub_path: bool = False, -) -> str: - r"""Check path.""" - # Assert path starts with sep and does not contain invalid characters. - if not path.startswith(BACKEND_SEPARATOR): - raise ValueError( - f"Invalid backend path '{path}', must start with '{BACKEND_SEPARATOR}'." - ) - if not allow_sub_path and path.endswith(BACKEND_SEPARATOR): - raise ValueError( - f"Invalid backend path '{path}', must not end on '{BACKEND_SEPARATOR}'." - ) - if path and BACKEND_ALLOWED_CHARS_COMPILED.fullmatch(path) is None: - raise ValueError( - f"Invalid backend path '{path}', " - f"does not match '{BACKEND_ALLOWED_CHARS}'." - ) - - # Remove immediately consecutive seps - is_sub_path = path.endswith(BACKEND_SEPARATOR) - paths = path.split(BACKEND_SEPARATOR) - paths = [path for path in paths if path] - path = BACKEND_SEPARATOR + BACKEND_SEPARATOR.join(paths) - if is_sub_path and not path.endswith(BACKEND_SEPARATOR): - path += BACKEND_SEPARATOR - - return path - - -def check_version(version: str) -> str: - r"""Check version.""" - # Assert version is not empty and does not contain invalid characters. - if not version: - raise ValueError("Version must not be empty.") - if VERSION_ALLOWED_CHARS_COMPILED.fullmatch(version) is None: - raise ValueError( - f"Invalid version '{version}', " - f"does not match '{VERSION_ALLOWED_CHARS}'." - ) - - return version - - def date_format(date: datetime.datetime) -> str: return date.strftime("%Y-%m-%d") -def file_owner(path: str) -> str: - r"""Get file owner.""" - if os.name == "nt": # pragma: no cover - import win32security - - sd = win32security.GetFileSecurity( - path, - win32security.OWNER_SECURITY_INFORMATION, - ) - owner_sid = sd.GetSecurityDescriptorOwner() - owner, _, _ = win32security.LookupAccountSid(None, owner_sid) - - else: # pragma: no Windows cover - import pwd - - owner = pwd.getpwuid(os.stat(path).st_uid).pw_name - - return owner - - -def raise_file_exists_error(path: str): - raise FileExistsError( - errno.EEXIST, - os.strerror(errno.EEXIST), - path, - ) - - def raise_file_not_found_error(path: str): raise FileNotFoundError( errno.ENOENT, diff --git a/audbackend/core/interface/versioned.py b/audbackend/core/versioned.py similarity index 70% rename from audbackend/core/interface/versioned.py rename to audbackend/core/versioned.py index 182563b0..cdd9c86d 100644 --- a/audbackend/core/interface/versioned.py +++ b/audbackend/core/versioned.py @@ -1,48 +1,39 @@ import fnmatch import os +import re import typing import audeer from audbackend.core import utils -from audbackend.core.backend.base import Base as Backend +from audbackend.core.base import AbstractBackend from audbackend.core.errors import BackendError -from audbackend.core.interface.base import Base -class Versioned(Base): - r"""Interface for versioned file access. +class Versioned(AbstractBackend): + r"""Backend for versioned file access. - Use this interface if you care about versioning. + Use this backend if you care about versioning. For each file on the backend path one or more versions may exist. Args: backend: backend object - .. Prepare backend and interface for docstring examples + .. Prepare backend for docstring examples Examples: >>> file = "src.txt" - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) - >>> interface.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) + >>> backend = Versioned(filesystem) + >>> backend.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) >>> for version in ["1.0.0", "2.0.0"]: - ... interface.put_file(file, "/file.txt", version) - >>> interface.ls() + ... backend.put_file(file, "/file.txt", version) + >>> backend.ls() [('/file.txt', '1.0.0'), ('/file.txt', '2.0.0'), ('/sub/archive.zip', '1.0.0')] - >>> interface.get_file("/file.txt", "dst.txt", "2.0.0") + >>> backend.get_file("/file.txt", "dst.txt", "2.0.0") '...dst.txt' - """ - def __init__( - self, - backend: Backend, - ): - super().__init__(backend) - def checksum( self, path: str, @@ -68,22 +59,20 @@ def checksum( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" >>> import audeer >>> audeer.md5(file) 'd41d8cd98f00b204e9800998ecf8427e' - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.checksum("/file.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.checksum("/file.txt", "1.0.0") 'd41d8cd98f00b204e9800998ecf8427e' """ - path_with_version = self._path_with_version(path, version) - return self.backend.checksum(path_with_version) + path = self.path(path, version) + return self._checksum(path) def copy_file( self, @@ -133,17 +122,15 @@ def copy_file( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.exists("/copy.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.exists("/copy.txt", "1.0.0") False - >>> interface.copy_file("/file.txt", "/copy.txt", version="1.0.0") - >>> interface.exists("/copy.txt", "1.0.0") + >>> backend.copy_file("/file.txt", "/copy.txt", version="1.0.0") + >>> backend.exists("/copy.txt", "1.0.0") True """ @@ -153,13 +140,11 @@ def copy_file( versions = [version] for version in versions: - src_path_with_version = self._path_with_version(src_path, version) - dst_path_with_version = self._path_with_version(dst_path, version) - self.backend.copy_file( - src_path_with_version, - dst_path_with_version, - validate=validate, - verbose=verbose, + self._copy_file( + self.path(src_path, version), + self.path(dst_path, version), + validate, + verbose, ) def date( @@ -190,19 +175,18 @@ def date( RuntimeError: if backend was not opened .. - >>> backend = DoctestFileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) + >>> backend._date = mock_date Examples: >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.date("/file.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.date("/file.txt", "1.0.0") '1991-02-20' """ - path_with_version = self._path_with_version(path, version) - return self.backend.date(path_with_version) + path = self.path(path, version) + return self._date(path) def exists( self, @@ -235,24 +219,19 @@ def exists( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.exists("/file.txt", "1.0.0") + >>> backend.exists("/file.txt", "1.0.0") False - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.exists("/file.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.exists("/file.txt", "1.0.0") True """ - path_with_version = self._path_with_version(path, version) - return self.backend.exists( - path_with_version, - suppress_backend_errors=suppress_backend_errors, - ) + path = self.path(path, version) + return self._exists(path, suppress_backend_errors) def get_archive( self, @@ -311,26 +290,18 @@ def get_archive( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) + >>> backend.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) >>> os.remove(file) - >>> interface.get_archive("/sub/archive.zip", ".", "1.0.0") + >>> backend.get_archive("/sub/archive.zip", ".", "1.0.0") ['src.txt'] """ - src_path_with_version = self._path_with_version(src_path, version) - return self.backend.get_archive( - src_path_with_version, - dst_root, - tmp_root=tmp_root, - validate=validate, - verbose=verbose, - ) + src_path = self.path(src_path, version) + return self._get_archive(src_path, dst_root, tmp_root, validate, verbose) def get_file( self, @@ -387,27 +358,19 @@ def get_file( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "1.0.0") >>> os.path.exists("dst.txt") False - >>> _ = interface.get_file("/file.txt", "dst.txt", "1.0.0") - >>> os.path.exists("dst.txt") - True + >>> backend.get_file("/file.txt", "dst.txt", "1.0.0") + '...dst.txt' """ - src_path_with_version = self._path_with_version(src_path, version) - return self.backend.get_file( - src_path_with_version, - dst_path, - validate=validate, - verbose=verbose, - ) + src_path = self.path(src_path, version) + return self._get_file(src_path, dst_path, validate, verbose) def latest_version( self, @@ -430,15 +393,13 @@ def latest_version( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.put_file(file, "/file.txt", "2.0.0") - >>> interface.latest_version("/file.txt") + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "2.0.0") + >>> backend.latest_version("/file.txt") '2.0.0' """ @@ -495,42 +456,34 @@ def ls( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.put_file(file, "/file.txt", "2.0.0") - >>> interface.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) - >>> interface.ls() + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "2.0.0") + >>> backend.put_archive(".", "/sub/archive.zip", "1.0.0", files=[file]) + >>> backend.ls() [('/file.txt', '1.0.0'), ('/file.txt', '2.0.0'), ('/sub/archive.zip', '1.0.0')] - >>> interface.ls(latest_version=True) + >>> backend.ls(latest_version=True) [('/file.txt', '2.0.0'), ('/sub/archive.zip', '1.0.0')] - >>> interface.ls("/file.txt") + >>> backend.ls("/file.txt") [('/file.txt', '1.0.0'), ('/file.txt', '2.0.0')] - >>> interface.ls(pattern="*.txt") + >>> backend.ls(pattern="*.txt") [('/file.txt', '1.0.0'), ('/file.txt', '2.0.0')] - >>> interface.ls(pattern="archive.*") + >>> backend.ls(pattern="archive.*") [('/sub/archive.zip', '1.0.0')] - >>> interface.ls("/sub/") + >>> backend.ls("/sub/") [('/sub/archive.zip', '1.0.0')] """ # noqa: E501 if path.endswith("/"): # find files under sub-path - paths = self.backend.ls( - path, - suppress_backend_errors=suppress_backend_errors, - ) + paths = self._ls(path, suppress_backend_errors) else: # find versions of path root, file = self.split(path) - paths = self.backend.ls( - root, - suppress_backend_errors=suppress_backend_errors, - ) + paths = self._ls(root, suppress_backend_errors) # filter for '/root/version/file' depth = root.count("/") + 1 @@ -637,19 +590,17 @@ def move_file( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.exists("/move.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.exists("/move.txt", "1.0.0") False - >>> interface.move_file("/file.txt", "/move.txt", version="1.0.0") - >>> interface.exists("/move.txt", "1.0.0") + >>> backend.move_file("/file.txt", "/move.txt", version="1.0.0") + >>> backend.exists("/move.txt", "1.0.0") True - >>> interface.exists("/file.txt", "1.0.0") + >>> backend.exists("/file.txt", "1.0.0") False """ @@ -659,57 +610,59 @@ def move_file( versions = [version] for version in versions: - src_path_with_version = self._path_with_version(src_path, version) - dst_path_with_version = self._path_with_version(dst_path, version) - self.backend.move_file( - src_path_with_version, - dst_path_with_version, - validate=validate, - verbose=verbose, + self._move_file( + self.path(src_path, version), + self.path(dst_path, version), + validate, + verbose, ) - def owner( + def path( self, path: str, version: str, + *, + allow_sub_path: bool = False, ) -> str: - r"""Owner of file on backend. + r"""Resolved backend path. - If the owner of the file - cannot be determined, - an empty string is returned. + Resolved path as handed to the filesystem object. + + / + -> + // Args: - path: path to file on backend + path: path on backend version: version string + allow_sub_path: if ``path`` is allowed + to point to a sub-path + instead of a file Returns: - owner + path as handed to the filesystem object Raises: - BackendError: if an error is raised on the backend, - e.g. ``path`` does not exist ValueError: if ``path`` does not start with ``'/'``, - ends on ``'/'``, + ends on ``'/'`` when ``allow_sub_path`` is ``False``, or does not match ``'[A-Za-z0-9/._-]+'`` - ValueError: if ``version`` is empty or - does not match ``'[A-Za-z0-9._-]+'`` - RuntimeError: if backend was not opened - - .. - >>> backend = DoctestFileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) - - Examples: - >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.owner("/file.txt", "1.0.0") - 'doctest' """ - path_with_version = self._path_with_version(path, version) - return self.backend.owner(path_with_version) + path = self._path(path, allow_sub_path) + + # Assert version is not empty and does not contain invalid characters. + version_allowed_chars = "[A-Za-z0-9._-]+" + if not version: + raise ValueError("Version must not be empty.") + if re.compile(version_allowed_chars).fullmatch(version) is None: + raise ValueError( + f"Invalid version '{version}', " + f"does not match '{version_allowed_chars}'." + ) + + root, name = self.split(path) + path = self.join(root, version, name) + return path def put_archive( self, @@ -773,28 +726,19 @@ def put_archive( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.exists("/sub/archive.tar.gz", "1.0.0") + >>> backend.exists("/sub/archive.tar.gz", "1.0.0") False - >>> interface.put_archive(".", "/sub/archive.tar.gz", "1.0.0") - >>> interface.exists("/sub/archive.tar.gz", "1.0.0") + >>> backend.put_archive(".", "/sub/archive.tar.gz", "1.0.0") + >>> backend.exists("/sub/archive.tar.gz", "1.0.0") True """ - dst_path_with_version = self._path_with_version(dst_path, version) - self.backend.put_archive( - src_root, - dst_path_with_version, - files=files, - tmp_root=tmp_root, - validate=validate, - verbose=verbose, - ) + dst_path = self.path(dst_path, version) + self._put_archive(src_root, dst_path, files, tmp_root, validate, verbose) def put_file( self, @@ -843,26 +787,19 @@ def put_file( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.exists("/file.txt", "3.0.0") + >>> backend.exists("/file.txt", "3.0.0") False - >>> interface.put_file(file, "/file.txt", "3.0.0") - >>> interface.exists("/file.txt", "3.0.0") + >>> backend.put_file(file, "/file.txt", "3.0.0") + >>> backend.exists("/file.txt", "3.0.0") True """ - dst_path_with_version = self._path_with_version(dst_path, version) - return self.backend.put_file( - src_path, - dst_path_with_version, - validate=validate, - verbose=verbose, - ) + dst_path = self.path(dst_path, version) + return self._put_file(src_path, dst_path, validate, verbose) def remove_file( self, @@ -886,22 +823,20 @@ def remove_file( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.exists("/file.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.exists("/file.txt", "1.0.0") True - >>> interface.remove_file("/file.txt", "1.0.0") - >>> interface.exists("/file.txt", "1.0.0") + >>> backend.remove_file("/file.txt", "1.0.0") + >>> backend.exists("/file.txt", "1.0.0") False """ - path_with_version = self._path_with_version(path, version) - self.backend.remove_file(path_with_version) + path = self.path(path, version) + self._remove_file(path) def versions( self, @@ -930,39 +865,17 @@ def versions( RuntimeError: if backend was not opened .. - >>> backend = audbackend.backend.FileSystem("host", "repo") - >>> backend.open() - >>> interface = Versioned(backend) + >>> backend = Versioned(filesystem) Examples: >>> file = "src.txt" - >>> interface.put_file(file, "/file.txt", "1.0.0") - >>> interface.put_file(file, "/file.txt", "2.0.0") - >>> interface.versions("/file.txt") + >>> backend.put_file(file, "/file.txt", "1.0.0") + >>> backend.put_file(file, "/file.txt", "2.0.0") + >>> backend.versions("/file.txt") ['1.0.0', '2.0.0'] """ - utils.check_path(path) - + path = self._path(path) paths = self.ls(path, suppress_backend_errors=suppress_backend_errors) vs = [v for _, v in paths] - return vs - - def _path_with_version( - self, - path: str, - version: str, - ) -> str: - r"""Convert to versioned path. - - / - -> - // - - """ - path = utils.check_path(path) - version = utils.check_version(version) - root, name = self.split(path) - path = self.join(root, version, name) - return path diff --git a/audbackend/interface/__init__.py b/audbackend/interface/__init__.py deleted file mode 100644 index 49b6438e..00000000 --- a/audbackend/interface/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from audbackend.core.interface.base import Base -from audbackend.core.interface.maven import Maven -from audbackend.core.interface.unversioned import Unversioned -from audbackend.core.interface.versioned import Versioned diff --git a/docs/api-src/audbackend.backend.rst b/docs/api-src/audbackend.backend.rst deleted file mode 100644 index 221e81a4..00000000 --- a/docs/api-src/audbackend.backend.rst +++ /dev/null @@ -1,26 +0,0 @@ -.. _backends: - -audbackend.backend -================== - -.. automodule:: audbackend.backend - -Currently the following -backends are supported: - -.. autosummary:: - :toctree: - :nosignatures: - - Artifactory - FileSystem - -Users can implement their own -backend by deriving from -:class:`audbackend.backend.Base`. - -.. autosummary:: - :toctree: - :nosignatures: - - Base diff --git a/docs/api-src/audbackend.interface.rst b/docs/api-src/audbackend.interface.rst deleted file mode 100644 index 4e2710cc..00000000 --- a/docs/api-src/audbackend.interface.rst +++ /dev/null @@ -1,27 +0,0 @@ -.. _interfaces: - -audbackend.interface -==================== - -.. automodule:: audbackend.interface - -To access the files on a backend users -can use the following interfaces: - -.. autosummary:: - :toctree: - :nosignatures: - - Maven - Unversioned - Versioned - -Users can implement their own -interface by deriving from -:class:`audbackend.interface.Base` - -.. autosummary:: - :toctree: - :nosignatures: - - Base diff --git a/docs/api-src/audbackend.rst b/docs/api-src/audbackend.rst index 7b500e92..5af5c8cb 100644 --- a/docs/api-src/audbackend.rst +++ b/docs/api-src/audbackend.rst @@ -4,34 +4,17 @@ audbackend .. automodule:: audbackend :mod:`audbackend` -provides an abstract layer +provides an abstract backend layer for managing files -in a repository -on a host. - -This involves two components: - -1. A *backend* that - implements file operations - on a specific storing device - (:mod:`audbackend.backend`). - -2. An *interface* that - passes user requests - to a backend - (:mod:`audbackend.interface`). - -Additionally, -the following classes -and functions are available. +on different file systems +as supported by :mod:`fsspec`. .. autosummary:: :toctree: :nosignatures: + AbstractBackend BackendError - Repository - access - create - delete - register + Maven + Unversioned + Versioned diff --git a/docs/conftest.py b/docs/conftest.py new file mode 100644 index 00000000..6187d85e --- /dev/null +++ b/docs/conftest.py @@ -0,0 +1,43 @@ +import datetime +from doctest import ELLIPSIS +import os + +import pytest +from sybil import Sybil +from sybil.parsers.rest import DocTestParser +from sybil.parsers.rest import PythonCodeBlockParser + +import audbackend + + +@pytest.fixture(scope="function") +def mock_date(): + r"""Custom date method to return a fixed date.""" + + def date(path: str) -> str: + date = datetime.datetime(1991, 2, 20) + date = audbackend.core.utils.date_format(date) + return date + + yield date + + +@pytest.fixture(scope="module", autouse=True) +def prepare_docstring_tests(tmpdir_factory): + r"""Code to be run before each doctest.""" + tmp = tmpdir_factory.mktemp("tmp") + # Change to tmp dir + current_dir = os.getcwd() + os.chdir(tmp) + + yield + + # Change back to current dir + os.chdir(current_dir) + + +pytest_collect_file = Sybil( + parsers=[DocTestParser(optionflags=ELLIPSIS), PythonCodeBlockParser()], + pattern="*.rst", + fixtures=["mock_date", "prepare_docstring_tests"], +).pytest() diff --git a/docs/developer-guide.rst b/docs/developer-guide.rst index fbdf6839..73c4be56 100644 --- a/docs/developer-guide.rst +++ b/docs/developer-guide.rst @@ -1,15 +1,3 @@ -.. set temporal working directory -.. jupyter-execute:: - :hide-code: - - import os - import audeer - - _cwd_root = os.getcwd() - _tmp_root = audeer.mkdir(os.path.join("docs", "tmp-developer-guide")) - os.chdir(_tmp_root) - - .. _developer-guide: Developer guide @@ -18,54 +6,62 @@ Developer guide The aim of :mod:`audbackend` is to provide an -abstract interface for -any kind of file storage system. -Even those, -that have not been -invented yet :) - -This tutorial is divided -into two parts. -In :ref:`develop-new-interface`, -we show how to create a custom interface -that manages user content. -Under :ref:`develop-new-backend`, -we take a deep dive -and develop a backend -that stores files into -a SQLite_ database. - - -.. _develop-new-interface: - -Develop new interface ---------------------- - -We can implement our own interface +abstract backend for +any kind of file storage system, +that follow the :mod:`fsspec` specifications. + +This tutorial shows +how to develop a new backend. + + +.. _develop-new-backend: + +Develop new backend +------------------- + +We can implement our own backend by deriving from -:class:`audbackend.interface.Base`. +:class:`audbackend.AbstractBackend`. +Afterwards, +you need to provide implementations +for the following methods: + +* ``checksum()`` +* ``copy_file()`` +* ``date()`` +* ``exists()`` +* ``get_file()`` +* ``ls()`` +* ``move_file()`` +* ``path()`` +* ``put_file()`` +* ``remove_file()`` + + For instance, -we can create an interface +we can create an backend to manage user content. -It provides three functions: +It provides one additional method: * ``add_user()`` to register a user -* ``upload()`` to upload a file for user -* ``ls()`` to list the files of a user We store user information in a database under -``"/user.map"``. +``"/.user.map"`` on the backend. To access and update the database we implement the following helper class. -.. jupyter-execute:: +.. code-block:: python + + import os + import pickle import audbackend - import shelve + import fsspec + class UserDB: r"""User database. @@ -74,569 +70,154 @@ helper class. and write changes back to the backend. """ - def __init__(self, backend: audbackend.backend.Base): - self.backend = backend - - def __enter__(self) -> shelve.Shelf: - if self.backend.exists("/user.db"): - self.backend.get_file("/user.db", "~.db") - self._map = shelve.open("~.db", flag="w", writeback=True) + def __init__(self, fs: fsspec.AbstractFileSystem): + self.backend = audbackend.Unversioned(fs) + self.remote_file = "/.db.pkl" + self.local_file = audeer.path(".db.pkl") + + def __enter__(self) -> dict: + if self.backend.exists(self.remote_file): + self.backend.get_file(self.remote_file, self.local_file) + if os.path.exists(self.local_file): + with open(self.local_file, "rb") as file: + self._map = pickle.load(file) else: - self._map = shelve.open("~.db", writeback=True) + self._map = {} return self._map def __exit__(self, exc_type, exc_val, exc_tb): - self._map.close() - self.backend.put_file("~.db", "/user.db") - os.remove("~.db") + with open(self.local_file, "wb") as file: + pickle.dump(self._map, file, protocol=pickle.HIGHEST_PROTOCOL) + self.backend.put_file(self.local_file, self.remote_file) + os.remove(self.local_file) Now, -we implement the interface. +we implement the backend. -.. jupyter-execute:: +.. code-block:: python - class UserContent(audbackend.interface.Base): + class UserContent(audbackend.AbstractBackend): - def add_user(self, username: str, password: str): + def add_user(self, user: str, password: str): r"""Add user to database.""" - with UserDB(self.backend) as map: - map[username] = password - - def upload(self, username: str, password: str, path: str): - r"""Upload user file.""" - with UserDB(self.backend) as map: - if username not in map or map[username] != password: - raise ValueError("User does not exist or wrong password.") - self.backend.put_file(path, f"/{username}/{os.path.basename(path)}") - - def ls(self, username: str) -> list: - r"""List files of user.""" - with UserDB(self.backend) as map: - if username not in map: - return [] - return self.backend.ls(f"/{username}/") - - -Let's create a repository -with our custom interface, -and upload a file: - -.. jupyter-execute:: - - import audeer - - audbackend.backend.FileSystem.create("./host", "repo") - backend = audbackend.backend.FileSystem("./host", "repo") - backend.open() - interface = UserContent(backend) - - interface.add_user("audeering", "pa$$word") - audeer.touch("local.txt") - interface.upload("audeering", "pa$$word", "local.txt") - interface.ls("audeering") + with UserDB(self.fs) as map: + map[user] = password + def checksum(self, path: str, *, user: str, password: str): + path = self.path(path, user=user, password=password) + return self._checksum(path) -At the end we clean up and delete our repo. - -.. jupyter-execute:: - - backend.close() - audbackend.backend.FileSystem.delete("./host", "repo") - - -.. _develop-new-backend: - -Develop new backend -------------------- - -In the previous section -we have used an existing -backend implementation. -Now we develop a new backend -that implements -a SQLite_ database. - -A new backend -should be implemented as a class -deriving from -:class:`audbackend.backend.Base`. -As can be seen in the file -:file:`audbackend/core/backend/base.py`, -we need to implement the following private methods: - -* ``_access()`` -* ``_checksum()`` -* ``_close()`` -* ``_create()`` -* ``_date()`` -* ``_delete()`` -* ``_exists()`` -* ``_get_file()`` -* ``_ls()`` -* ``_open()`` -* ``_owner()`` -* ``_put_file()`` -* ``_remove_file()`` - -We call the class ``SQLite``. -and we add two more attributes -in the constructor: - -* ``_path``: the path of the database, - which we derive from the host and repository, - namely ``"//db"``. -* ``_db``: connection object to the database. - -.. jupyter-execute:: - - import audbackend - import os - - class SQLite(audbackend.backend.Base): - - def __init__( - self, - host: str, - repository: str, - ): - super().__init__(host, repository) - self._path = os.path.join(host, repository, "db") - self._db = None - - -Obviously, -this is not yet a fully -functional backend implementation. -But for the sake of clarity, -we will dynamically add -the required methods one after another -using a dedicated decorator: - -.. jupyter-execute:: - - import functools - - def add_method(cls): - def decorator(func): - @functools.wraps(func) - def wrapper(self, *args, **kwargs): - return func(self, *args, **kwargs) - setattr(cls, func.__name__, wrapper) - return func - return decorator - -For instance, -to ensure the connection to the database -is properly closed, -we add a destructor method. -This is not mandatory -and whether it is needed -depends on the backend. - -.. jupyter-execute:: - - @add_method(SQLite) - def __del__(self): - if self._db is not None: - self._db.close() - - -Before we can instantiate an instance, -we implement a method that -creates a new database -(or raises an error if it exists). -And add a table ``data`` -that holds the content -and meta information of the files -stored on our backend: - -* ``path``: the (virtual) backend path -* ``checksum``: the checksum -* ``content``: the binary content -* ``date``: the date when the file was added -* ``owner``: the owner of the file - -.. jupyter-execute:: - - import errno - import os - import sqlite3 as sl - - @add_method(SQLite) - def _create( + def copy_file( self, - ): - if os.path.exists(self._path): - raise FileExistsError( - errno.EEXIST, - os.strerror(errno.EEXIST), - self._path, - ) - os.mkdir(os.path.dirname(self._path)) - self._db = sl.connect(self._path) - query = """ - CREATE TABLE data ( - path TEXT NOT NULL, - checksum TEXT NOT NULL, - content BLOB NOT NULL, - date TEXT NOT NULL, - owner TEXT NOT NULL, - PRIMARY KEY (path) - ); - """ - with self._db as db: - db.execute(query) - -Now we create a repository. - -.. jupyter-execute:: - - SQLite.create("./host", "repo") + src_path: str, + dst_path: str, + *, + user: str, + password: str, + validate: bool = False, + verbose: bool = False, + ): + src_path = self.path(src_path, user=user, password=password) + dst_path = self.path(dst_path, user=user, password=password) + self._copy_file(src_path, dst_path, validate, verbose) -Before we can access the repository -we add a method to open -an existing database -(or raise an error -it is not found). + def date(self, path: str, *, user: str, password: str): + path = self.path(path, user=user, password=password) + return self._date(path) -.. jupyter-execute:: + def exists(self, path: str, *, user: str, password: str): + path = self.path(path, user=user, password=password) + return self._exists(path) - @add_method(SQLite) - def _open( - self, - ): - if not os.path.exists(self._path): - raise FileNotFoundError( - errno.ENOENT, - os.strerror(errno.ENOENT), - self._path, - ) - self._db = sl.connect(self._path) - -Now we instantiate an object of our backend -and access the repository we created. -We then wrap the object -with the :class:`audbackend.interface.Versioned` interface. - -.. jupyter-execute:: - - backend = SQLite("./host", "repo") - backend.open() - interface = audbackend.interface.Versioned(backend) - -Next, -we implement a method to check -if a file exists. - -.. jupyter-execute:: - - @add_method(SQLite) - def _exists( - self, - path: str, - ) -> bool: - with self._db as db: - query = f""" - SELECT EXISTS ( - SELECT 1 - FROM data - WHERE path="{path}" - ); - """ - result = db.execute(query).fetchone()[0] == 1 - return result - - interface.exists("/file.txt", "1.0.0") - -And a method that uploads -a file to our backend. - -.. jupyter-execute:: - - import datetime - import getpass - - @add_method(SQLite) - def _put_file( - self, - src_path: str, - dst_path: str, - checksum: str, - verbose: bool, - ): - with self._db as db: - with open(src_path, "rb") as file: - content = file.read() - query = """ - INSERT INTO data (path, checksum, content, date, owner) - VALUES (?, ?, ?, ?, ?) - """ - owner = getpass.getuser() - date = datetime.datetime.today().strftime("%Y-%m-%d") - data = (dst_path, checksum, content, date, owner) - db.execute(query, data) - -Let's put a file on the backend. - -.. jupyter-execute:: - - file = audeer.touch("file.txt") - interface.put_file(file, "/file.txt", "1.0.0") - interface.exists("/file.txt", "1.0.0") - -We need three more functions -to access its meta information. - -.. jupyter-execute:: - - @add_method(SQLite) - def _checksum( - self, - path: str, - ) -> str: - with self._db as db: - query = f""" - SELECT checksum - FROM data - WHERE path="{path}" - """ - checksum = db.execute(query).fetchone()[0] - return checksum - - interface.checksum("/file.txt", "1.0.0") - -.. jupyter-execute:: - - @add_method(SQLite) - def _date( - self, - path: str, - ) -> str: - with self._db as db: - query = f""" - SELECT date - FROM data - WHERE path="{path}" - """ - date = db.execute(query).fetchone()[0] - return date - - interface.date("/file.txt", "1.0.0") - -.. jupyter-execute:: - - @add_method(SQLite) - def _owner( - self, - path: str, - ) -> str: - with self._db as db: - query = f""" - SELECT owner - FROM data - WHERE path="{path}" - """ - owner = db.execute(query).fetchone()[0] - return owner - - interface.owner("/file.txt", "1.0.0") - -Implementing a copy function is optional. -But the default implementation -will temporarily download the file -and then upload it again. -Hence, -we provide a more efficient implementation. - -.. jupyter-execute:: - - @add_method(SQLite) - def _copy_file( + def get_file( self, src_path: str, dst_path: str, - verbose: bool, - ): - with self._db as db: - query = f""" - SELECT * - FROM data - WHERE path="{src_path}" - """ - (_, checksum, content, _, owner) = db.execute(query).fetchone() - date = datetime.datetime.today().strftime("%Y-%m-%d") - query = """ - INSERT INTO data (path, checksum, content, date, owner) - VALUES (?, ?, ?, ?, ?) - """ - data = (dst_path, checksum, content, date, owner) - db.execute(query, data) - - interface.copy_file("/file.txt", "/copy/file.txt", version="1.0.0") - interface.exists("/copy/file.txt", "1.0.0") - -Implementing a move function is also optional, -but it is more efficient if we provide one. - -.. jupyter-execute:: - - @add_method(SQLite) - def _move_file( + *, + user: str, + password: str, + validate: bool = False, + verbose: bool = False, + ): + src_path = self.path(src_path, user=user, password=password) + return self._get_file(src_path, dst_path, validate, verbose) + + def ls( self, - src_path: str, - dst_path: str, - verbose: bool, - ): - with self._db as db: - query = f""" - UPDATE data - SET path="{dst_path}" - WHERE path="{src_path}" - """ - db.execute(query) - - interface.move_file("/copy/file.txt", "/move/file.txt", version="1.0.0") - interface.exists("/move/file.txt", "1.0.0") - -We implement a method -to fetch a file -from the backend. - -.. jupyter-execute:: - - @add_method(SQLite) - def _get_file( + path: str = "/", + *, + user: str, + password: str, + pattern: str = None, + suppress_backend_errors: bool = False, + ): + path = self.path(path, allow_sub_path=True, user=user, password=password) + return self._ls(path, suppress_backend_errors, pattern) + # paths = [path.replace(self.sep + user, "") for path in paths] + # return paths + + def move_file( self, src_path: str, dst_path: str, - verbose: bool, - ): - with self._db as db: - query = f""" - SELECT content - FROM data - WHERE path="{src_path}" - """ - content = db.execute(query).fetchone()[0] - with open(dst_path, "wb") as fp: - fp.write(content) - -Which we then use to download the file. - -.. jupyter-execute:: - - file = interface.get_file("/file.txt", "local.txt", "1.0.0") - -To inspect the files -on our backend -we provide a listing method. - -.. jupyter-execute:: - - import typing - - @add_method(SQLite) - def _ls( - self, - path: str, - ) -> typing.List[str]: - - with self._db as db: - - # list all files and versions under sub-path - query = f""" - SELECT path - FROM data - WHERE path - LIKE ? || "%" - """ - ls = db.execute(query, [path]).fetchall() - ls = [x[0] for x in ls] - - return ls - -Let's test it. - -.. jupyter-execute:: - - interface.ls("/") - -.. jupyter-execute:: - - interface.ls("/file.txt") - -To delete a file -from our backend -requires another method. - -.. jupyter-execute:: + *, + user: str, + password: str, + validate: bool = False, + verbose: bool = False, + ): + src_path = self.path(src_path, user=user, password=password) + dst_path = self.path(dst_path, user=user, password=password) + self._move_file(src_path, dst_path, validate, verbose) - @add_method(SQLite) - def _remove_file( + def path( self, path: str, - ): - with self._db as db: - query = f""" - DELETE - FROM data - WHERE path="{path}" - """ - db.execute(query) - - interface.remove_file("/file.txt", "1.0.0") - interface.ls("/") - -We add a method to close the connection -to a database and call it. - -.. jupyter-execute:: - - @add_method(SQLite) - def _close( - self, - ): - self._db.close() - - backend.close() - -Finally, -we add a method that -deletes the database -and removes the repository -(or raises an error -if the database does not exist). - -.. jupyter-execute:: + *, + user: str, + password: str, + allow_sub_path: bool = False, + ): + with UserDB(self.fs) as db: + if user not in db or db[user] != password: + raise ValueError("User does not exist or wrong password.") + path = self._path(path, allow_sub_path) + return self.join(self.sep, user, path) - @add_method(SQLite) - def _delete( + def put_file( self, - ): - if not os.path.exists(self._path): - raise FileNotFoundError( - errno.ENOENT, - os.strerror(errno.ENOENT), - self._path, - ) - os.remove(self._path) - os.rmdir(os.path.dirname(self._path)) - - SQLite.delete("./host", "repo") + src_path: str, + dst_path: str, + *, + user: str, + password: str, + validate: bool = False, + verbose: bool = False, + ): + dst_path = self.path(dst_path, user=user, password=password) + return self._put_file(src_path, dst_path, validate, verbose) -And that's it, -we have a fully functional backend. + def remove_file(path: str, *, user: str, password: str): + path = self.path(path, user=user, password=password) + self._remove_file(path) -Voilà! -.. reset working directory and clean up -.. jupyter-execute:: - :hide-code: +Let's create a dir file system +with a repository folder +with our custom backend, +and upload a file: - import shutil - os.chdir(_cwd_root) - shutil.rmtree(_tmp_root) +>>> import audeer +>>> repo = audeer.mkdir("./repo") +>>> filesystem = fsspec.filesystem("dir", path=repo) +>>> backend = UserContent(filesystem) +>>> backend.add_user("test", "pa$$word") +>>> _ = audeer.touch("local.txt") +>>> backend.put_file("local.txt", "/file.txt", user="test", password="pa$$word") +>>> backend.ls("/", user="test", password="pa$$word") +['/test/file.txt'] +At the end we clean up and delete our repo. -.. _SQLite: https://sqlite.org/index.html +>>> audeer.rmdir(repo) diff --git a/docs/index.rst b/docs/index.rst index 049611a6..f8513f4b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -7,7 +7,6 @@ install usage developer-guide - legacy .. Warning: then usage of genindex is a hack to get a TOC entry, see .. https://stackoverflow.com/a/42310803. This might break the usage of sphinx if @@ -17,8 +16,6 @@ :hidden: api/audbackend - api/audbackend.backend - api/audbackend.interface genindex .. toctree:: diff --git a/docs/legacy.rst b/docs/legacy.rst deleted file mode 100644 index ad657fb2..00000000 --- a/docs/legacy.rst +++ /dev/null @@ -1,77 +0,0 @@ -.. set temporal working directory -.. jupyter-execute:: - :hide-code: - - import os - import audeer - - _cwd_root = os.getcwd() - _tmp_root = audeer.mkdir(os.path.join("docs", "tmp")) - os.chdir(_tmp_root) - - -.. _legacy-backends: - -Legacy backends -=============== - -The default file structure on the backend -has changed with version 1.0.0. - -Before, -a file ``/sub/file.txt`` -with version ``1.0.0`` -was stored under - -.. code-block:: - - /sub/file/1.0.0/file-1.0.0.txt - -Now it is stored under - -.. code-block:: - - /sub/1.0.0/file.txt - -To force the old file structure -use the :class:`audbackend.interface.Maven` interface. -We recommend this -for existing repositories -that store files -under the old structure. -If you have to store files -that contain a dot -in its file extension, -you have to list those extensions explicitly. - -.. jupyter-execute:: - - import audbackend - - audbackend.backend.FileSystem.create("./host", "repo") - backend = audbackend.backend.FileSystem("./host", "repo") - backend.open() - interface = audbackend.interface.Maven(backend, extensions=["tar.gz"]) - -Afterwards we upload an TAR.GZ archive -and check that it is stored as expected. - -.. jupyter-execute:: - - import audeer - import tempfile - - with tempfile.TemporaryDirectory() as tmp: - audeer.touch(audeer.path(tmp, "file.txt")) - interface.put_archive(tmp, "/file.tar.gz", "1.0.0") - - audeer.list_file_names("./host", recursive=True, basenames=True) - - -.. reset working directory and clean up -.. jupyter-execute:: - :hide-code: - - import shutil - os.chdir(_cwd_root) - shutil.rmtree(_tmp_root) diff --git a/docs/usage.rst b/docs/usage.rst index 1150c9cc..45df61ab 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -1,33 +1,18 @@ -.. set temporal working directory -.. jupyter-execute:: - :hide-code: - - import os - import audeer - - _cwd_root = os.getcwd() - _tmp_root = audeer.mkdir("docs", "tmp-usage") - os.chdir(_tmp_root) - - .. _usage: Usage ===== With the help of :mod:`audbackend` -a user can store files -in a repository -on a storage system -(backend). - -File access is handled -via an :ref:`interface `, -which defines how the data is structured +a user can access files +on different backends, +which define how the data is structured and presented to the user. -In addition, -:mod:`audbackend` supports different storage systems, -so called :ref:`backends `. + +The files can be stored on +any file system +following the :mod:`fsspec` specifications, +including Microsoft Azure, and Amazon S3. .. _unversioned-data-on-a-file-system: @@ -35,106 +20,65 @@ so called :ref:`backends `. Unversioned data on a file system --------------------------------- -To store data on a backend -we need to create a repository first. -We select the :class:`audbackend.backend.FileSystem` backend. - -.. jupyter-execute:: - :hide-output: - - import audbackend +To access data on a backend +we need a file system +that stores the data. +We select the :class:`fsspec.DirFileSystem`, +to store the data inside a local folder. - audbackend.backend.FileSystem.create("./host", "repo") +>>> import fsspec +>>> import audeer +>>> path = audeer.mkdir("./repo") +>>> filesystem = fsspec.filesystem("dir", path=path) -Once we have an existing repository, -we can access it by instantiating the backend class. -For some backends we have to establish a connection first. -This can be achieved using a ``with`` statement, -or by calling ``backend.open()`` at the beginning, -and ``backend.close()`` at the end. -If you are unsure -whether your backend requires this step, -just do it always. - -.. jupyter-execute:: - - backend = audbackend.backend.FileSystem("./host", "repo") - backend.open() - -After establishing a connection -we could directly execute read and write operations -on the backend object. -However, -we recommend to always use -:mod:`interfaces ` -to communicate with a backend. -Here, we use :class:`audbackend.interface.Unversioned`. +Now we can wrap around a backend, +which manages how the data is stored. +Here, we use :class:`audbackend.Unversioned`. It does not support versioning, i.e. exactly one file exists for a backend path. -.. jupyter-execute:: +>>> import audbackend +>>> backend = audbackend.Unversioned(filesystem) - interface = audbackend.interface.Unversioned(backend) - -Now we can upload our first file to the repository. +Now we can upload our first file to the backend. Note, it is important to provide an absolute path -from the root of the repository +from the root of the backend by starting it with ``/``. -.. jupyter-execute:: - - import audeer - - file = audeer.touch("file.txt") - interface.put_file(file, "/file.txt") +>>> file = audeer.touch("file.txt") +>>> backend.put_file(file, "/file.txt") -We check if the file exists in the repository. +We check if the file exists on the backend. -.. jupyter-execute:: - - interface.exists("/file.txt") +>>> backend.exists("/file.txt") +True And access its meta information, like its checksum. -.. jupyter-execute:: - - interface.checksum("/file.txt") - -Its creation date. - -.. jupyter-execute:: - - interface.date("/file.txt") - -Or the owner who uploaded the file. - -.. jupyter-execute:: - - interface.owner("/file.txt") +>>> backend.checksum("/file.txt") +'d41d8cd98f00b204e9800998ecf8427e' We create a copy of the file and verify it exists. -.. jupyter-execute:: - - interface.copy_file("/file.txt", "/copy/file.txt") - interface.exists("/copy/file.txt") +>>> backend.copy_file("/file.txt", "/copy/file.txt") +>>> backend.exists("/copy/file.txt") +True We move it to a new location. -.. jupyter-execute:: - - interface.move_file("/copy/file.txt", "/move/file.txt") - interface.exists("/copy/file.txt"), interface.exists("/move/file.txt") +>>> backend.move_file("/copy/file.txt", "/move/file.txt") +>>> backend.exists("/copy/file.txt") +False +>>> backend.exists("/move/file.txt") +True We download the file and store it as ``local.txt``. -.. jupyter-execute:: - - file = interface.get_file("/file.txt", "local.txt") +>>> file = backend.get_file("/file.txt", "local.txt") It is possible to upload one or more files @@ -146,29 +90,24 @@ and store them as ``folder.zip`` under the sub-path ``/archives/`` in the repository. -.. jupyter-execute:: - - folder = audeer.mkdir("./folder") - audeer.touch(folder, "file1.txt") - audeer.touch(folder, "file2.txt") - interface.put_archive(folder, "/archives/folder.zip") +>>> folder = audeer.mkdir("./folder") +>>> _ = audeer.touch(folder, "file1.txt") +>>> _ = audeer.touch(folder, "file2.txt") +>>> backend.put_archive(folder, "/archives/folder.zip") When we download an archive it is automatically extracted, -when using :meth:`audbackend.interface.Unversioned.get_archive` -instead of :meth:`audbackend.interface.Unversioned.get_file`. +when using :meth:`audbackend.Unversioned.get_archive` +instead of :meth:`audbackend.Unversioned.get_file`. -.. jupyter-execute:: - - paths = interface.get_archive("/archives/folder.zip", "downloaded_folder") - paths +>>> backend.get_archive("/archives/folder.zip", "downloaded_folder") +['file1.txt', 'file2.txt'] We can list all files in the repository. -.. jupyter-execute:: - - interface.ls("/") +>>> backend.ls("/") +['/archives/folder.zip', '/file.txt', '/move/file.txt'] If we provide a sub-path @@ -177,45 +116,20 @@ a list with files that start with the sub-path is returned. -.. jupyter-execute:: - - interface.ls("/archives/") +>>> backend.ls("/archives/") +['/archives/folder.zip'] We can remove files. -.. jupyter-execute:: - - interface.remove_file("/file.txt") - interface.remove_file("/archives/folder.zip") - interface.ls("/") - -Finally, -we close the connection to the backend. - -.. jupyter-execute:: +>>> backend.remove_file("/file.txt") +>>> backend.remove_file("/archives/folder.zip") +>>> backend.ls("/") +['/move/file.txt'] - backend.close() +In the end we clean up, +by deleting the repository folder. -And delete the whole repository -with all its content. - -.. jupyter-execute:: - - audbackend.backend.FileSystem.delete("host", "repo") - -Now, -if we try to open the repository again, -we will get an error -(note that this behavior is not guaranteed -for all backend classes -as it depends on the implementation). - -.. jupyter-execute:: - - try: - backend.open() - except audbackend.BackendError as ex: - display(str(ex.exception)) +>>> audeer.rmdir(path) .. _versioned-data-on-a-file-system: @@ -223,98 +137,75 @@ as it depends on the implementation). Versioned data on a file system ------------------------------- -We start by creating a repository -on the :class:`audbackend.backend.FileSystem` backend. -This time we access it -with the :class:`audbackend.interface.Versioned` interface -(which is also used by default). +We start by creating a repository folder +and a :class:`ffspec.DirFileSystem` file system. + +>>> repo = audeer.mkdir("./repo") +>>> filesystem = fsspec.filesystem("dir", path=repo) -.. jupyter-execute:: +This time we manage the files +with the :class:`audbackend.Versioned` backend. - audbackend.backend.FileSystem.create("./host", "repo") - backend = audbackend.backend.FileSystem("./host", "repo") - backend.open() - interface = audbackend.interface.Versioned(backend) +>>> backend = audbackend.Versioned(filesystem) We then upload a file and assign version ``"1.0.0"`` to it. -.. jupyter-execute:: - - with open("file.txt", "w") as file: - file.write("Content v1.0.0") - interface.put_file("file.txt", "/file.txt", "1.0.0") +>>> with open("file.txt", "w") as file: +... _ = file.write("Content v1.0.0") +>>> backend.put_file("file.txt", "/file.txt", "1.0.0") Now we change the file for version ``"2.0.0"``. -.. jupyter-execute:: - - with open("file.txt", "w") as file: - file.write("Content v2.0.0") - interface.put_file("file.txt", "/file.txt", "2.0.0") +>>> with open("file.txt", "w") as file: +... _ = file.write("Content v2.0.0") +>>> backend.put_file("file.txt", "/file.txt", "2.0.0") If we inspect the content of the repository it will return a list of tuples containing file name and version. -.. jupyter-execute:: - - interface.ls("/") +>>> backend.ls("/") +[('/file.txt', '1.0.0'), ('/file.txt', '2.0.0')] We can also inspect the available versions for a file. -.. jupyter-execute:: - - interface.versions("/file.txt") +>>> backend.versions("/file.txt") +['1.0.0', '2.0.0'] Or request it's latest version. -.. jupyter-execute:: - - interface.latest_version("/file.txt") +>>> backend.latest_version("/file.txt") +'2.0.0' We can copy a specific version of a file. -.. jupyter-execute:: - - interface.copy_file("/file.txt", "/copy/file.txt", version="1.0.0") - interface.ls("/copy/") +>>> backend.copy_file("/file.txt", "/copy/file.txt", version="1.0.0") +>>> backend.ls("/copy/") +[('/copy/file.txt', '1.0.0')] Or all versions. -.. jupyter-execute:: - - interface.copy_file("/file.txt", "/copy/file.txt") - interface.ls("/copy/") +>>> backend.copy_file("/file.txt", "/copy/file.txt") +>>> backend.ls("/copy/") +[('/copy/file.txt', '1.0.0'), ('/copy/file.txt', '2.0.0')] We move them to a new location. -.. jupyter-execute:: - - interface.move_file("/copy/file.txt", "/move/file.txt") - interface.ls("/move/") +>>> backend.move_file("/copy/file.txt", "/move/file.txt") +>>> backend.ls("move/") +[('/move/file.txt', '1.0.0'), ('/move/file.txt', '2.0.0')] When downloading a file, we can select the desired version. -.. jupyter-execute:: - - path = interface.get_file("/file.txt", "local.txt", "1.0.0") - with open(path, "r") as file: - display(file.read()) +>>> path = backend.get_file("/file.txt", "local.txt", "1.0.0") +>>> with open(path, "r") as file: +... file.read() +'Content v1.0.0' When we are done, -we close the connection to the repository. - -.. jupyter-execute:: - - backend.close() - -.. reset working directory and clean up -.. jupyter-execute:: - :hide-code: +we delete the repository. - import shutil - os.chdir(_cwd_root) - shutil.rmtree(_tmp_root) +>>> audeer.rmdir(repo) diff --git a/pyproject.toml b/pyproject.toml index 78643310..ee63e301 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ classifiers = [ ] dependencies = [ 'audeer >=1.20.0', + 'fsspec', 'pywin32; sys_platform == "win32"', ] # Get version dynamically from git @@ -39,8 +40,12 @@ dynamic = ['version'] artifactory = [ 'dohq-artifactory >=0.10.0', ] +minio = [ + 'minio', +] all = [ 'dohq-artifactory >=0.10.0', + 'minio', ] @@ -72,7 +77,6 @@ skip = './audbackend.egg-info,./build,.docs/api,./docs/_templates' cache_dir = '.cache/pytest' xfail_strict = true addopts = ''' - --doctest-plus --cov=audbackend --cov-fail-under=100 --cov-report term-missing @@ -112,6 +116,9 @@ extend-ignore = [ 'F401', # * imported but unused 'I001', # Import block is un-sorted or un-formatted ] +'audbackend/core/base.py' = [ + 'D417', # Missing argument descriptions in the docstring for `remove_file`: `**kwargs`, `*args` +] 'audbackend/core/conftest.py' = [ 'D101', # Missing docstring in public class 'D105', # Missing docstring in magic method diff --git a/tests/bad_file_system.py b/tests/bad_file_system.py deleted file mode 100644 index 7e6a3e5a..00000000 --- a/tests/bad_file_system.py +++ /dev/null @@ -1,43 +0,0 @@ -import audeer - -import audbackend - - -class BadFileSystem(audbackend.backend.FileSystem): - r"""Imitates a corrupted file system.""" - - # Overwrite `put_file()` to avoid calling it `exists()` - def put_file( - self, - src_path: str, - dst_path: str, - *, - validate: bool = False, - verbose: bool = False, - ): - r"""Put file on backend.""" - checksum = audeer.md5(src_path) - audbackend.core.utils.call_function_on_backend( - self._put_file, - src_path, - dst_path, - checksum, - verbose, - ) - - def _get_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - super()._get_file(src_path, dst_path, verbose) - # raise error after file was retrieved - raise InterruptedError() - - def _exists( - self, - path: str, - ): - # raise error when checking if file exists - raise InterruptedError() diff --git a/tests/conftest.py b/tests/conftest.py index 3ea04592..26511e59 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,14 +1,12 @@ -import getpass import os +import typing +import fsspec +import minio import pytest import audeer -import audbackend - -from singlefolder import SingleFolder - # UID for test session # Repositories on the host will be named @@ -16,100 +14,76 @@ pytest.UID = audeer.uid()[:8] -@pytest.fixture(scope="package", autouse=True) -def register_single_folder(): - warning = ( - "register is deprecated and will be removed with version 2.2.0. " - "Use backend classes directly instead." - ) - with pytest.warns(UserWarning, match=warning): - audbackend.register("single-folder", SingleFolder) - - -@pytest.fixture(scope="package", autouse=False) -def hosts(tmpdir_factory): - return { - # For tests based on backend names (deprecated), - # like audbackend.access() - "artifactory": "https://audeering.jfrog.io/artifactory", - "file-system": str(tmpdir_factory.mktemp("host")), - "single-folder": str(tmpdir_factory.mktemp("host")), - } - - -@pytest.fixture(scope="function", autouse=False) -def owner(request): - r"""Return expected owner value.""" - backend_cls = request.param - if ( - hasattr(audbackend.backend, "Artifactory") - and backend_cls == audbackend.backend.Artifactory - ): - owner = backend_cls.get_authentication("audeering.jfrog.io/artifactory")[0] - else: +def create_file_tree(root: str, files: typing.Sequence): + r"""Create file tree. + + Args: + root: folder under which files should be created + files: relative file path + to create inside ``folder``, + e.g. ``/sub-path/file.txt`` + + """ + for path in files: if os.name == "nt": - owner = "Administrators" + path = path.replace("/", os.path.sep) + if path.endswith(os.path.sep): + path = audeer.mkdir(root, path) else: - owner = getpass.getuser() + path = audeer.path(root, path) + audeer.mkdir(os.path.dirname(path)) + audeer.touch(path) + + +@pytest.fixture(scope="function") +def dir_filesystem(tmpdir): + root = audeer.mkdir(tmpdir, f"unittest-{pytest.UID}-{audeer.uid()[:8]}") + # Wrap "local" filesystem in "dir" filesystem + # to return paths relatiove to root + yield fsspec.filesystem( + "dir", + path=root, + fs=fsspec.filesystem("local"), + ) + - yield owner +@pytest.fixture(scope="function") +def minio_filesystem(): + bucket = f"unittest-{pytest.UID}-{audeer.uid()[:8]}" + # Use MinIO playground, compare + # https://min.io/docs/minio/linux/developers/python/API.html + url = "play.minio.io:9000" + access = "Q3AM3UQ867SPQQA43P2F" + secret = "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG" -@pytest.fixture(scope="function", autouse=False) -def interface(tmpdir_factory, request): - r"""Create a backend with interface. + # Create bucket + client = minio.Minio(url, access_key=access, secret_key=secret) + client.make_bucket(bucket) - This fixture should be called indirectly - providing a list of ``(backend, interface)`` tuples. - For example, to create a file-system backend - and access it with a versioned interface: + fs_s3 = fsspec.filesystem( + "s3", + endpoint_url=f"https://{url}", + key=access, + secret=secret, + ) - .. code-block:: python + yield fsspec.filesystem("dir", path=bucket, fs=fs_s3) - @pytest.mark.parametrize( - "interface", - [(audbackend.backend.FileSystem, audbackend.interface.Versioned)], - indirect=True, - ) + # Delete all objects in bucket + objects = client.list_objects(bucket, recursive=True) + for obj in objects: + client.remove_object(bucket, obj.object_name) - At the end of the test the backend is deleted. + # Delete bucket + client.remove_bucket(bucket) - """ - backend_cls, interface_cls = request.param - if ( - hasattr(audbackend.backend, "Artifactory") - and backend_cls == audbackend.backend.Artifactory - ): - artifactory = True - host = "https://audeering.jfrog.io/artifactory" - else: - artifactory = False - host = str(tmpdir_factory.mktemp("host")) - repository = f"unittest-{pytest.UID}-{audeer.uid()[:8]}" - - backend_cls.create(host, repository) - with backend_cls(host, repository) as backend: - interface = interface_cls(backend) - - yield interface - - if artifactory: - import dohq_artifactory - - try: - backend._repo.delete() - except dohq_artifactory.exception.ArtifactoryException: - # It might happen from time to time, - # that a repository cannot be deleted. - # In those cases, - # we don't raise an error here, - # but rely on the user calling the clean up script - # from time to time: - # $ python tests/misc/cleanup_artifactory.py - pass - - if not artifactory: - backend_cls.delete(host, repository) + +protocol = os.getenv("AUDBACKEND_TEST_FS", "dir") +if protocol == "dir": + filesystem = dir_filesystem +elif protocol == "minio": + filesystem = minio_filesystem @pytest.fixture(scope="package", autouse=True) diff --git a/tests/requirements.txt b/tests/requirements.txt index 40b25a97..3fa7cc63 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,4 @@ +minio pytest<8.1.0 # required by doctestplus pytest-cov -pytest-doctestplus +sybil diff --git a/tests/singlefolder.py b/tests/singlefolder.py deleted file mode 100644 index 5f397c30..00000000 --- a/tests/singlefolder.py +++ /dev/null @@ -1,166 +0,0 @@ -import datetime -import os -import pickle -import shutil -import threading -import typing - -import audeer - -import audbackend - - -class SingleFolder(audbackend.backend.Base): - r"""Backend implemented in a single folder. - - Files put on the backend - are stored under a random file name. - A serialized dictionary - stores the dependency between - backend path and the names. - It also stores checksum for every file. - - """ - - class Map: - r"""Provides exclusive access to the map file.""" - - def __init__( - self, - path: str, - lock: threading.Lock, - ): - self.path = path - self.obj = {} - self.lock = lock - - def __enter__(self): - self.lock.acquire() - if os.path.exists(self.path): - with open(self.path, "rb") as fp: - self.obj = pickle.load(fp) - return self.obj - - def __exit__( - self, - type, - value, - traceback, - ): - with open(self.path, "wb") as fp: - pickle.dump(self.obj, fp) - self.lock.release() - - def __init__( - self, - host: str, - repository: str, - ): - super().__init__(host, repository) - - self._root = audeer.mkdir(audeer.path(host, repository)) - self._path = audeer.path(self._root, ".map") - self._lock = threading.Lock() - - def _open( - self, - ): - if not os.path.exists(self._path): - raise audbackend.core.utils.raise_file_not_found_error(self._path) - - with self.Map(self._path, self._lock): - pass - - def _checksum( - self, - path: str, - ) -> str: - with self.Map(self._path, self._lock) as m: - return m[path][1] - - def _create( - self, - ): - if os.path.exists(self._path): - raise audbackend.core.utils.raise_file_exists_error(self._path) - with self.Map(self._path, self._lock): - pass - - def _date( - self, - path: str, - ) -> str: - with self.Map(self._path, self._lock) as m: - p = m[path][0] - date = os.path.getmtime(p) - date = datetime.datetime.fromtimestamp(date) - date = audbackend.core.utils.date_format(date) - return date - - def _delete( - self, - ): - if not os.path.exists(self._path): - raise audbackend.core.utils.raise_file_not_found_error(self._path) - with self._lock: - audeer.rmdir(self._root) - - def _exists( - self, - path: str, - ) -> bool: - with self.Map(self._path, self._lock) as m: - return path in m - - def _get_file( - self, - src_path: str, - dst_path: str, - verbose: bool, - ): - with self.Map(self._path, self._lock) as m: - shutil.copy(m[src_path][0], dst_path) - - def _ls( - self, - path: str, - ) -> typing.List[str]: - with self.Map(self._path, self._lock) as m: - ls = [] - - for p in m: - if p.startswith(path): - ls.append(p) - - return ls - - def _owner( - self, - path: str, - ): - with self.Map(self._path, self._lock) as m: - p = m[path][0] - return audbackend.core.utils.file_owner(p) - - def _put_file( - self, - src_path: str, - dst_path: str, - checksum: str, - verbose: bool, - ): - with self.Map(self._path, self._lock) as m: - if dst_path not in m or checksum != m[dst_path][1]: - m[dst_path] = {} - p = audeer.path(self._root, audeer.uid()[:8]) - m[dst_path] = (p, checksum) - - shutil.copy(src_path, m[dst_path][0]) - - def _remove_file( - self, - path: str, - ): - with self.Map(self._path, self._lock) as m: - os.remove(m[path][0]) - m.pop(path) diff --git a/tests/test_api.py b/tests/test_api.py deleted file mode 100644 index 1efb1d81..00000000 --- a/tests/test_api.py +++ /dev/null @@ -1,96 +0,0 @@ -import pytest - -import audeer - -import audbackend - - -@pytest.mark.parametrize( - "name, host, repository, cls", - [ - ( - "file-system", - "file-system", - f"unittest-{audeer.uid()[:8]}", - audbackend.backend.FileSystem, - ), - ( - "artifactory", - "artifactory", - f"unittest-{audeer.uid()[:8]}", - audbackend.backend.Artifactory, - ), - pytest.param( # backend does not exist - "bad-backend", - None, - None, - None, - marks=pytest.mark.xfail(raises=ValueError), - ), - pytest.param( # host does not exist - "artifactory", - "bad-host", - "repo", - None, - marks=pytest.mark.xfail(raises=audbackend.BackendError), - ), - pytest.param( # invalid repository name - "artifactory", - "artifactory", - "bad/repo", - None, - marks=pytest.mark.xfail(raises=audbackend.BackendError), - ), - ], -) -def test_api(hosts, name, host, repository, cls): - if host is not None and host in hosts: - host = hosts[name] - - access_warning = ( - "access is deprecated and will be removed with version 2.2.0. " - r"Use Backend.__init__\(\) of corresponding backend instead." - ) - create_warning = ( - "create is deprecated and will be removed with version 2.2.0. " - r"Use class method Backend.create\(\) of corresponding backend instead." - ) - delete_warning = ( - "delete is deprecated and will be removed with version 2.2.0. " - r"Use class method Backend.delete\(\) of corresponding backend instead." - ) - - error_msg = "A backend class with name 'bad' does not exist." - with pytest.raises(ValueError, match=error_msg): - with pytest.warns(UserWarning, match=access_warning): - audbackend.access("bad", host, repository) - - error_msg = ( - "An exception was raised by the backend, " - "please see stack trace for further information." - ) - - with pytest.raises(audbackend.BackendError, match=error_msg): - with pytest.warns(UserWarning, match=access_warning): - audbackend.access(name, host, repository) - - # returns versioned interface for legacy reasons - with pytest.warns(UserWarning, match=create_warning): - interface = audbackend.create(name, host, repository) - assert isinstance(interface, audbackend.interface.Versioned) - assert isinstance(interface.backend, cls) - - with pytest.raises(audbackend.BackendError, match=error_msg): - with pytest.warns(UserWarning, match=create_warning): - audbackend.create(name, host, repository) - - with pytest.warns(UserWarning, match=access_warning): - interface = audbackend.access(name, host, repository) - assert isinstance(interface.backend, cls) - - with pytest.warns(UserWarning, match=delete_warning): - audbackend.delete(name, host, repository) - - with pytest.raises(audbackend.BackendError, match=error_msg): - with pytest.warns(UserWarning, match=access_warning): - audbackend.access(name, host, repository) diff --git a/tests/test_backend_artifactory.py b/tests/test_backend_artifactory.py deleted file mode 100644 index 42108fa3..00000000 --- a/tests/test_backend_artifactory.py +++ /dev/null @@ -1,215 +0,0 @@ -import os - -import pytest - -import audeer - -import audbackend - - -@pytest.fixture(scope="function", autouse=False) -def hide_credentials(): - defaults = {} - - for key in [ - "ARTIFACTORY_USERNAME", - "ARTIFACTORY_API_KEY", - "ARTIFACTORY_CONFIG_FILE", - ]: - defaults[key] = os.environ.get(key, None) - - for key, value in defaults.items(): - if value is not None: - del os.environ[key] - - yield - - for key, value in defaults.items(): - if value is not None: - os.environ[key] = value - elif key in os.environ: - del os.environ[key] - - -def test_authentication(tmpdir, hosts, hide_credentials): - host = hosts["artifactory"] - config_path = audeer.path(tmpdir, "config.cfg") - os.environ["ARTIFACTORY_CONFIG_FILE"] = config_path - - # config file does not exist - - backend = audbackend.backend.Artifactory(host, "repository") - assert backend.authentication == ("anonymous", "") - - # config file is empty - - audeer.touch(config_path) - backend = audbackend.backend.Artifactory(host, "repository") - assert backend.authentication == ("anonymous", "") - - # config file entry without username and password - - with open(config_path, "w") as fp: - fp.write(f"[{host}]\n") - - backend = audbackend.backend.Artifactory(host, "repository") - assert backend.authentication == ("anonymous", "") - - # config file entry with username and password - - username = "bad" - api_key = "bad" - with open(config_path, "w") as fp: - fp.write(f"[{host}]\n") - fp.write(f"username = {username}\n") - fp.write(f"password = {api_key}\n") - - backend = audbackend.backend.Artifactory(host, "repository") - assert backend.authentication == ("bad", "bad") - with pytest.raises(audbackend.BackendError): - backend.open() - - -@pytest.mark.parametrize("host", ["https://audeering.jfrog.io/artifactory"]) -@pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) -def test_create_delete_repositories(host, repository): - audbackend.backend.Artifactory.create(host, repository) - audbackend.backend.Artifactory.delete(host, repository) - - -@pytest.mark.parametrize("host", ["https://audeering.jfrog.io/artifactory"]) -@pytest.mark.parametrize("repository", [f"unittest-{pytest.UID}-{audeer.uid()[:8]}"]) -@pytest.mark.parametrize("authentication", [("non-existing", "non-existing")]) -def test_errors(host, repository, authentication): - backend = audbackend.backend.Artifactory( - host, repository, authentication=authentication - ) - with pytest.raises(audbackend.BackendError): - backend.open() - - -@pytest.mark.parametrize( - "interface", - [(audbackend.backend.Artifactory, audbackend.interface.Maven)], - indirect=True, -) -@pytest.mark.parametrize( - "file, version, extensions, regex, expected", - [ - ( - "/file.tar.gz", - "1.0.0", - [], - False, - "/file.tar/1.0.0/file.tar-1.0.0.gz", - ), - ( - "/file.tar.gz", - "1.0.0", - ["tar.gz"], - False, - "/file/1.0.0/file-1.0.0.tar.gz", - ), - ( - "/.tar.gz", - "1.0.0", - ["tar.gz"], - False, - "/.tar/1.0.0/.tar-1.0.0.gz", - ), - ( - "/tar.gz", - "1.0.0", - ["tar.gz"], - False, - "/tar/1.0.0/tar-1.0.0.gz", - ), - ( - "/.tar.gz", - "1.0.0", - [], - False, - "/.tar/1.0.0/.tar-1.0.0.gz", - ), - ( - "/.tar", - "1.0.0", - [], - False, - "/.tar/1.0.0/.tar-1.0.0", - ), - ( - "/tar", - "1.0.0", - [], - False, - "/tar/1.0.0/tar-1.0.0", - ), - # test regex - ( - "/file.0.tar.gz", - "1.0.0", - [r"\d+.tar.gz"], - False, - "/file.0.tar/1.0.0/file.0.tar-1.0.0.gz", - ), - ( - "/file.0.tar.gz", - "1.0.0", - [r"\d+.tar.gz"], - True, - "/file/1.0.0/file-1.0.0.0.tar.gz", - ), - ( - "/file.99.tar.gz", - "1.0.0", - [r"\d+.tar.gz"], - True, - "/file/1.0.0/file-1.0.0.99.tar.gz", - ), - ( - "/file.prediction.99.tar.gz", - "1.0.0", - [r"prediction.\d+.tar.gz", r"truth.tar.gz"], - True, - "/file/1.0.0/file-1.0.0.prediction.99.tar.gz", - ), - ( - "/file.truth.tar.gz", - "1.0.0", - [r"prediction.\d+.tar.gz", r"truth.tar.gz"], - True, - "/file/1.0.0/file-1.0.0.truth.tar.gz", - ), - ( - "/file.99.tar.gz", - "1.0.0", - [r"(\d+.)?tar.gz"], - True, - "/file/1.0.0/file-1.0.0.99.tar.gz", - ), - ( - "/file.tar.gz", - "1.0.0", - [r"(\d+.)?tar.gz"], - True, - "/file/1.0.0/file-1.0.0.tar.gz", - ), - ], -) -def test_maven_file_structure( - tmpdir, interface, file, version, extensions, regex, expected -): - interface.extensions = extensions - interface.regex = regex - - src_path = audeer.touch(audeer.path(tmpdir, "tmp")) - interface.put_file(src_path, file, version) - - url = str(interface.backend.path(expected)) - url_expected = str( - interface.backend.path(interface._path_with_version(file, version)) - ) - assert url_expected == url - assert interface.ls(file) == [(file, version)] - assert interface.ls() == [(file, version)] diff --git a/tests/test_backend_base.py b/tests/test_backend_base.py deleted file mode 100644 index 71dd023f..00000000 --- a/tests/test_backend_base.py +++ /dev/null @@ -1,126 +0,0 @@ -import re - -import pytest - -import audbackend - - -@pytest.mark.parametrize( - "paths, expected", - [ - (["/"], "/"), - (["/", ""], "/"), - (["/file"], "/file"), - (["/file/"], "/file/"), - (["/root", "file"], "/root/file"), - (["/root", "file/"], "/root/file/"), - (["/", "root", None, "", "file", ""], "/root/file"), - (["/", "root", None, "", "file", "/"], "/root/file/"), - (["/", "root", None, "", "file", "/", ""], "/root/file/"), - pytest.param( - [""], - None, - marks=pytest.mark.xfail(raises=ValueError), - ), - pytest.param( - ["file"], - None, - marks=pytest.mark.xfail(raises=ValueError), - ), - pytest.param( - ["sub/file"], - None, - marks=pytest.mark.xfail(raises=ValueError), - ), - pytest.param( - ["", "/file"], - None, - marks=pytest.mark.xfail(raises=ValueError), - ), - ], -) -@pytest.mark.parametrize( - "backend", - [ - audbackend.backend.Base("host", "repository"), - ], -) -def test_join(paths, expected, backend): - assert backend.join(*paths) == expected - - -@pytest.mark.parametrize( - "path, expected", - [ - ("/", ("/", "")), - ("/file", ("/", "file")), - ("/root/", ("/root/", "")), - ("/root/file", ("/root/", "file")), - ("/root/file/", ("/root/file/", "")), - ("//root///file", ("/root/", "file")), - pytest.param( - "", - None, - marks=pytest.mark.xfail(raises=ValueError), - ), - pytest.param( - "file", - None, - marks=pytest.mark.xfail(raises=ValueError), - ), - pytest.param( - "sub/file", - None, - marks=pytest.mark.xfail(raises=ValueError), - ), - ], -) -@pytest.mark.parametrize( - "backend", - [ - audbackend.backend.Base("host", "repository"), - ], -) -def test_split(path, expected, backend): - assert backend.split(path) == expected - - -@pytest.mark.parametrize( - "backend", - [ - audbackend.backend.Base("host", "repository"), - ], -) -def test_errors(tmpdir, backend): - # Check errors when backend is not opened - error_msg = re.escape( - "Call 'Backend.open()' to establish a connection to the repository first." - ) - path = "file.txt" - src_path = "src.txt" - dst_path = "dst.txt" - src_root = "." - with pytest.raises(RuntimeError, match=error_msg): - backend.checksum(path) - with pytest.raises(RuntimeError, match=error_msg): - backend.copy_file(src_path, dst_path) - with pytest.raises(RuntimeError, match=error_msg): - backend.date(path) - with pytest.raises(RuntimeError, match=error_msg): - backend.exists(path) - with pytest.raises(RuntimeError, match=error_msg): - backend.get_archive(src_path, dst_path) - with pytest.raises(RuntimeError, match=error_msg): - backend.get_file(src_path, dst_path) - with pytest.raises(RuntimeError, match=error_msg): - backend.ls(path) - with pytest.raises(RuntimeError, match=error_msg): - backend.move_file(src_path, dst_path) - with pytest.raises(RuntimeError, match=error_msg): - backend.owner(path) - with pytest.raises(RuntimeError, match=error_msg): - backend.put_archive(src_root, dst_path) - with pytest.raises(RuntimeError, match=error_msg): - backend.put_file(src_path, dst_path) - with pytest.raises(RuntimeError, match=error_msg): - backend.remove_file(path) diff --git a/tests/test_backend_filesystem.py b/tests/test_backend_filesystem.py deleted file mode 100644 index 1ed2a9ac..00000000 --- a/tests/test_backend_filesystem.py +++ /dev/null @@ -1,169 +0,0 @@ -import os - -import pytest - -import audeer - -import audbackend - -from bad_file_system import BadFileSystem - - -@pytest.mark.parametrize( - "interface", - [(BadFileSystem, audbackend.interface.Versioned)], - indirect=True, -) -def test_get_file_interrupt(tmpdir, interface): - src_path = audeer.path(tmpdir, "~tmp") - - # put local file on backend - with open(src_path, "w") as fp: - fp.write("remote") - checksum_remote = audeer.md5(src_path) - interface.put_file(src_path, "/file", "1.0.0") - - # change content of local file - with open(src_path, "w") as fp: - fp.write("local") - checksum_local = audeer.md5(src_path) - assert checksum_local != checksum_remote - - # Try to use malfanctioning exists() method - with pytest.raises(audbackend.BackendError): - interface.exists("/file", "1.0.0") - assert interface.exists("/file", "1.0.0", suppress_backend_errors=True) is False - - # try to read remote file, local file remains unchanged - with pytest.raises(audbackend.BackendError): - interface.get_file("/file", src_path, "1.0.0") - assert audeer.md5(src_path) == checksum_local - - -@pytest.mark.parametrize( - "interface", - [(audbackend.backend.FileSystem, audbackend.interface.Maven)], - indirect=True, -) -@pytest.mark.parametrize( - "file, version, extensions, regex, expected", - [ - ( - "/file.tar.gz", - "1.0.0", - [], - False, - "file.tar/1.0.0/file.tar-1.0.0.gz", - ), - ( - "/file.tar.gz", - "1.0.0", - ["tar.gz"], - False, - "file/1.0.0/file-1.0.0.tar.gz", - ), - ( - "/.tar.gz", - "1.0.0", - ["tar.gz"], - False, - ".tar/1.0.0/.tar-1.0.0.gz", - ), - ( - "/tar.gz", - "1.0.0", - ["tar.gz"], - False, - "tar/1.0.0/tar-1.0.0.gz", - ), - ( - "/.tar.gz", - "1.0.0", - [], - False, - ".tar/1.0.0/.tar-1.0.0.gz", - ), - ( - "/.tar", - "1.0.0", - [], - False, - ".tar/1.0.0/.tar-1.0.0", - ), - ( - "/tar", - "1.0.0", - [], - False, - "tar/1.0.0/tar-1.0.0", - ), - # test regex - ( - "/file.0.tar.gz", - "1.0.0", - [r"\d+.tar.gz"], - False, - "file.0.tar/1.0.0/file.0.tar-1.0.0.gz", - ), - ( - "/file.0.tar.gz", - "1.0.0", - [r"\d+.tar.gz"], - True, - "file/1.0.0/file-1.0.0.0.tar.gz", - ), - ( - "/file.99.tar.gz", - "1.0.0", - [r"\d+.tar.gz"], - True, - "file/1.0.0/file-1.0.0.99.tar.gz", - ), - ( - "/file.prediction.99.tar.gz", - "1.0.0", - [r"prediction.\d+.tar.gz", r"truth.tar.gz"], - True, - "file/1.0.0/file-1.0.0.prediction.99.tar.gz", - ), - ( - "/file.truth.tar.gz", - "1.0.0", - [r"prediction.\d+.tar.gz", r"truth.tar.gz"], - True, - "file/1.0.0/file-1.0.0.truth.tar.gz", - ), - ( - "/file.99.tar.gz", - "1.0.0", - [r"(\d+.)?tar.gz"], - True, - "file/1.0.0/file-1.0.0.99.tar.gz", - ), - ( - "/file.tar.gz", - "1.0.0", - [r"(\d+.)?tar.gz"], - True, - "file/1.0.0/file-1.0.0.tar.gz", - ), - ], -) -def test_maven_file_structure( - tmpdir, interface, file, version, extensions, regex, expected -): - expected = expected.replace("/", os.path.sep) - - interface.extensions = extensions - interface.regex = regex - - src_path = audeer.touch(audeer.path(tmpdir, "tmp")) - interface.put_file(src_path, file, version) - - path = os.path.join(interface.backend._root, expected) - path_expected = interface.backend._expand( - interface._path_with_version(file, version), - ) - assert path_expected == path - assert interface.ls(file) == [(file, version)] - assert interface.ls() == [(file, version)] diff --git a/tests/test_backend_filesystem_only.py b/tests/test_backend_filesystem_only.py deleted file mode 100644 index 6f85b30c..00000000 --- a/tests/test_backend_filesystem_only.py +++ /dev/null @@ -1,8 +0,0 @@ -import pytest - -import audbackend - - -# Check optional backends are not available -with pytest.raises(AttributeError): - audbackend.backend.Artifactory("https://host.com", "repo") diff --git a/tests/test_base.py b/tests/test_base.py new file mode 100644 index 00000000..65e24968 --- /dev/null +++ b/tests/test_base.py @@ -0,0 +1,57 @@ +import inspect + +import pytest + +import audbackend + + +@pytest.mark.parametrize( + "method", + [ + "checksum", + "copy_file", + "date", + "exists", + "get_file", + "ls", + "move_file", + "path", + "put_file", + "remove_file", + ], +) +def test_errors(tmpdir, filesystem, method): + r"""Test for errors in AbstractBackend class. + + All of the methods that needs to be implemented + in derived classes + should raise a ``NotImplementedError``. + + Args: + tmpdir: tmpdir fixture + filesystem: filesystem fixture + method: method of ``audbackend.AbstractBackend`` + + """ + backend = audbackend.AbstractBackend(filesystem) + + # Get method to execute + backend_method = getattr(backend, method) + # Get number of arguments of method + args = inspect.signature(backend_method).parameters + n_args = len( + [ + arg + for arg in args.values() + if ( + arg.kind != arg.VAR_POSITIONAL # skip *args + and arg.kind != arg.VAR_KEYWORD # skip **kwargs + and arg.default is arg.empty + ) + ] + ) + # Define dummy arguments + args = [f"path-{n}" for n in range(n_args)] + + with pytest.raises(NotImplementedError): + backend_method(*args) diff --git a/tests/test_broken_filesystem.py b/tests/test_broken_filesystem.py new file mode 100644 index 00000000..b870cb40 --- /dev/null +++ b/tests/test_broken_filesystem.py @@ -0,0 +1,46 @@ +import pytest + +import audeer + +import audbackend + + +def test_get_file_interrupt(tmpdir, filesystem): + backend = audbackend.Unversioned(filesystem) + src_path = audeer.path(tmpdir, "~tmp") + + # Put local file on backend + with open(src_path, "w") as fp: + fp.write("remote") + checksum_remote = audeer.md5(src_path) + backend.put_file(src_path, "/file") + + # change content of local file + with open(src_path, "w") as fp: + fp.write("local") + checksum_local = audeer.md5(src_path) + assert checksum_local != checksum_remote + + # Simulate malfunctioning filesystem + + def get_file(src_path, dst_path, *, callback=None): + filesystem.get_file(src_path, dst_path, callback=callback) + # raise error after file was retrieved + raise InterruptedError() + + def exists(path): + # raise error when checking if file exists + raise InterruptedError() + + filesystem.get_file = get_file + filesystem.exists = exists + + # Try to use malfanctioning exists() method + with pytest.raises(audbackend.BackendError): + backend.exists("/file") + assert backend.exists("/file", suppress_backend_errors=True) is False + + # try to read remote file, local file remains unchanged + with pytest.raises(audbackend.BackendError): + backend.get_file("/file", src_path) + assert audeer.md5(src_path) == checksum_local diff --git a/tests/test_legacy_import.py b/tests/test_legacy_import.py deleted file mode 100644 index 3a65e3da..00000000 --- a/tests/test_legacy_import.py +++ /dev/null @@ -1,7 +0,0 @@ -import audbackend - - -def test_legacy_import(hosts): - audbackend.Backend("host", "repo") - audbackend.Artifactory(hosts["artifactory"], "repo") - audbackend.FileSystem(hosts["file-system"], "repo") diff --git a/tests/test_interface_maven.py b/tests/test_maven.py similarity index 75% rename from tests/test_interface_maven.py rename to tests/test_maven.py index 0c2ad182..770dfc13 100644 --- a/tests/test_interface_maven.py +++ b/tests/test_maven.py @@ -8,26 +8,18 @@ import audbackend -from singlefolder import SingleFolder +def test_errors(tmpdir, filesystem): + backend = audbackend.Maven(filesystem) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Maven), - (SingleFolder, audbackend.interface.Maven), - ], - indirect=True, -) -def test_errors(tmpdir, interface): # Ensure we have one file and one archive published on the backend archive = "/archive.zip" local_file = "file.txt" local_path = audeer.touch(audeer.path(tmpdir, local_file)) remote_file = f"/{local_file}" version = "1.0.0" - interface.put_file(local_path, remote_file, version) - interface.put_archive(tmpdir, archive, version, files=[local_file]) + backend.put_file(local_path, remote_file, version) + backend.put_archive(tmpdir, archive, version, files=[local_file]) # Create local read-only file and folder file_read_only = audeer.touch(audeer.path(tmpdir, "read-only-file.txt")) @@ -49,38 +41,43 @@ def test_errors(tmpdir, interface): "An exception was raised by the backend, " "please see stack trace for further information." ) + empty_version = "" + error_empty_version = "Version must not be empty." + invalid_version = "1.0.?" + error_invalid_version = re.escape( + f"Invalid version '{invalid_version}', " f"does not match '[A-Za-z0-9._-]+'." + ) + + # --- exists --- + # invalid version + with pytest.raises(ValueError, match=error_empty_version): + backend.exists(remote_file, empty_version) + with pytest.raises(ValueError, match=error_invalid_version): + backend.exists(remote_file, invalid_version) # --- ls --- # `path` does not exist with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls("/missing/") - interface.ls("/missing/", suppress_backend_errors=True) + backend.ls("/missing/") + backend.ls("/missing/", suppress_backend_errors=True) with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls("/missing.txt") - interface.ls("/missing.txt", suppress_backend_errors=True) + backend.ls("/missing.txt") + backend.ls("/missing.txt", suppress_backend_errors=True) remote_file_with_wrong_ext = audeer.replace_file_extension( remote_file, "missing", ) with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls(remote_file_with_wrong_ext) - interface.ls(remote_file_with_wrong_ext, suppress_backend_errors=True) + backend.ls(remote_file_with_wrong_ext) + backend.ls(remote_file_with_wrong_ext, suppress_backend_errors=True) # joined path without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.ls(file_invalid_path) + backend.ls(file_invalid_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.ls(file_invalid_char) + backend.ls(file_invalid_char) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Maven), - (SingleFolder, audbackend.interface.Maven), - ], - indirect=True, -) @pytest.mark.parametrize( "files", [ @@ -316,27 +313,51 @@ def test_errors(tmpdir, interface): ), ], ) -def test_ls(tmpdir, interface, files, path, latest, pattern, expected): - assert interface.ls() == [] - assert interface.ls("/") == [] +def test_ls(tmpdir, filesystem, files, path, latest, pattern, expected): + backend = audbackend.Maven(filesystem) + + assert backend.ls() == [] + assert backend.ls("/") == [] # create content tmp_file = audeer.touch(tmpdir, "~") for file_path, file_version in files: - interface.put_file(tmp_file, file_path, file_version) + backend.put_file(tmp_file, file_path, file_version) # test - assert interface.ls( + assert backend.ls( path, latest_version=latest, pattern=pattern, ) == sorted(expected) -def test_repr(): - interface = audbackend.interface.Maven( - audbackend.backend.FileSystem("host", "repo") - ) - assert interface.__repr__() == ( - "audbackend.interface.Maven(audbackend.backend.FileSystem('host', 'repo'))" - ) +@pytest.mark.parametrize( + "path, version, extensions, regex, expected", + [ + ("/file.tar.gz", "1.0.0", [], False, "/file.tar/1.0.0/file.tar-1.0.0.gz"), + ("/file.tar.gz", "1.0.0", [], True, "/file.tar/1.0.0/file.tar-1.0.0.gz"), + ("/file.tar.gz", "1.0.0", ["tar.gz"], False, "/file/1.0.0/file-1.0.0.tar.gz"), + ("/file.tar.gz", "1.0.0", ["tar.gz"], True, "/file/1.0.0/file-1.0.0.tar.gz"), + ("/file.tar.0", "1.0", [r"tar.\d+"], True, "/file/1.0/file-1.0.tar.0"), + ( + "/file.zip.0", + "1.0", + [r"tar.\d+"], + True, + "/file.zip/1.0/file.zip-1.0.0", + ), + ], +) +def test_path(tmpdir, filesystem, path, version, extensions, regex, expected): + backend = audbackend.Maven(filesystem, extensions=extensions, regex=regex) + assert backend.path(path, version) == expected + + +@pytest.mark.parametrize( + "expected", + ["audbackend.Maven(DirFileSystem)"], +) +def test_repr(filesystem, expected): + backend = audbackend.Maven(filesystem) + assert repr(backend) == expected diff --git a/tests/test_repository.py b/tests/test_repository.py deleted file mode 100644 index b394ff15..00000000 --- a/tests/test_repository.py +++ /dev/null @@ -1,16 +0,0 @@ -import pytest - -import audbackend - - -def test_repository(): - name = "name" - host = "host" - backend = "backend" - msg = "Repository is deprecated and will be removed with version 2.2.0." - with pytest.warns(UserWarning, match=msg): - repo = audbackend.Repository(name, host, backend) - assert repo.name == name - assert repo.host == host - assert repo.backend == backend - assert repo.__repr__() == f"Repository('{name}', '{host}', '{backend}')" diff --git a/tests/test_interface_unversioned.py b/tests/test_unversioned.py similarity index 52% rename from tests/test_interface_unversioned.py rename to tests/test_unversioned.py index 81a7c496..f1e7e06b 100644 --- a/tests/test_interface_unversioned.py +++ b/tests/test_unversioned.py @@ -1,41 +1,15 @@ import datetime import os import platform -import random import re import stat -import string import pytest import audeer import audbackend - -from singlefolder import SingleFolder - - -@pytest.fixture(scope="function", autouse=False) -def tree(tmpdir, request): - r"""Create file tree.""" - files = request.param - paths = [] - - for path in files: - if os.name == "nt": - path = path.replace("/", os.path.sep) - if path.endswith(os.path.sep): - path = audeer.path(tmpdir, path) - path = audeer.mkdir(path) - path = path + os.path.sep - paths.append(path) - else: - path = audeer.path(tmpdir, path) - audeer.mkdir(os.path.dirname(path)) - path = audeer.touch(path) - paths.append(path) - - yield paths +from tests.conftest import create_file_tree @pytest.mark.parametrize( @@ -84,74 +58,60 @@ def tree(tmpdir, request): ["dir/to/file.ext", "file.ext"], ), ], - indirect=["tree"], ) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], - indirect=True, -) -def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): +def test_archive(tmpdir, filesystem, tree, archive, files, tmp_root, expected): + r"""Test handling of archives. + + Args: + tmpdir: tmpdir fixture + filesystem: filesystem fixture + tree: file tree in the source folder + archive: name of archive on backend + files: files to include from ``tree`` in archive + tmp_root: temporary directory + to be used by ``put_archive()`` and ``get_archive()`` + expected: expected files in destination folder + after extracting the archive + + """ + backend = audbackend.Unversioned(filesystem) + + src_dir = audeer.mkdir(tmpdir, "src") + dst_dir = audeer.mkdir(tmpdir, "dst") + + create_file_tree(src_dir, tree) + if tmp_root is not None: tmp_root = audeer.path(tmpdir, tmp_root) if os.name == "nt": expected = [file.replace("/", os.sep) for file in expected] - # if a tmp_root is given but does not exist, + # If a tmp_root is given but does not exist, # put_archive() should fail if tmp_root is not None: - if os.path.exists(tmp_root): - os.removedirs(tmp_root) + audeer.rmdir(tmp_root) with pytest.raises(FileNotFoundError): - interface.put_archive( - tmpdir, - archive, - files=files, - tmp_root=tmp_root, - ) + backend.put_archive(src_dir, archive, files=files, tmp_root=tmp_root) audeer.mkdir(tmp_root) - interface.put_archive( - tmpdir, - archive, - files=files, - tmp_root=tmp_root, - ) - # operation will be skipped - interface.put_archive( - tmpdir, - archive, - files=files, - tmp_root=tmp_root, - ) - assert interface.exists(archive) + # Upload archive + backend.put_archive(src_dir, archive, files=files, tmp_root=tmp_root) + assert backend.exists(archive) + # Repeated upload + backend.put_archive(src_dir, archive, files=files, tmp_root=tmp_root) + assert backend.exists(archive) - # if a tmp_root is given but does not exist, + # If a tmp_root is given but does not exist, # get_archive() should fail if tmp_root is not None: - if os.path.exists(tmp_root): - os.removedirs(tmp_root) + audeer.rmdir(tmp_root) with pytest.raises(FileNotFoundError): - interface.get_archive( - archive, - tmpdir, - tmp_root=tmp_root, - ) + backend.get_archive(archive, dst_dir, tmp_root=tmp_root) audeer.mkdir(tmp_root) - assert ( - interface.get_archive( - archive, - tmpdir, - tmp_root=tmp_root, - ) - == expected - ) + files_in_archive = backend.get_archive(archive, dst_dir, tmp_root=tmp_root) + assert files_in_archive == expected @pytest.mark.parametrize( @@ -167,67 +127,49 @@ def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): ), ], ) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], - indirect=True, -) -def test_copy(tmpdir, src_path, dst_path, interface): - local_path = audeer.path(tmpdir, "~") +def test_copy(tmpdir, filesystem, src_path, dst_path): + backend = audbackend.Unversioned(filesystem) + + local_path = audeer.path(tmpdir, "file.ext") audeer.touch(local_path) - interface.put_file(local_path, src_path) + backend.put_file(local_path, src_path) # copy file if dst_path != src_path: - assert not interface.exists(dst_path) - interface.copy_file(src_path, dst_path) - assert interface.exists(src_path) - assert interface.exists(dst_path) + assert not backend.exists(dst_path) + backend.copy_file(src_path, dst_path) + assert backend.exists(src_path) + assert backend.exists(dst_path) # copy file again with different checksum with open(local_path, "w") as fp: fp.write("different checksum") - assert audeer.md5(local_path) != interface.checksum(src_path) - interface.put_file(local_path, src_path) - assert audeer.md5(local_path) == interface.checksum(src_path) + assert audeer.md5(local_path) != backend.checksum(src_path) + backend.put_file(local_path, src_path) + assert audeer.md5(local_path) == backend.checksum(src_path) if dst_path != src_path: - assert audeer.md5(local_path) != interface.checksum(dst_path) - interface.copy_file(src_path, dst_path) - assert audeer.md5(local_path) == interface.checksum(dst_path) + assert audeer.md5(local_path) != backend.checksum(dst_path) + backend.copy_file(src_path, dst_path) + assert audeer.md5(local_path) == backend.checksum(dst_path) - # clean up - interface.remove_file(src_path) - if dst_path != src_path: - interface.remove_file(dst_path) +def test_errors(tmpdir, filesystem): + backend = audbackend.Unversioned(filesystem) - -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], - indirect=True, -) -def test_errors(tmpdir, interface): # Ensure we have one file and one archive published on the backend archive = "/archive.zip" local_file = "file.txt" - local_path = audeer.touch(audeer.path(tmpdir, local_file)) + local_path = audeer.path(audeer.path(tmpdir, local_file)) + with open(local_path, "w") as fp: + fp.write("Text") local_folder = audeer.mkdir(audeer.path(tmpdir, "folder")) remote_file = f"/{local_file}" - interface.put_file(local_path, remote_file) - interface.put_archive(tmpdir, archive, files=[local_file]) + backend.put_file(local_path, remote_file) + backend.put_archive(tmpdir, archive, files=[local_file]) # Create local read-only file and folder file_read_only = audeer.touch(audeer.path(tmpdir, "read-only-file.txt")) @@ -235,6 +177,10 @@ def test_errors(tmpdir, interface): folder_read_only = audeer.mkdir(audeer.path(tmpdir, "read-only-folder")) os.chmod(folder_read_only, stat.S_IRUSR) + # Invalid archive + archive_invalid_type = "/archive.txt" + error_invalid_archive = "You can only create a ZIP or TAR.GZ archive, not " + # Invalid file names / versions and error messages file_invalid_path = "invalid/path.txt" error_invalid_path = re.escape( @@ -269,229 +215,225 @@ def test_errors(tmpdir, interface): # --- checksum --- # `path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.checksum("/missing.txt") + backend.checksum("/missing.txt") # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_char): - interface.checksum(file_invalid_char) + backend.checksum(file_invalid_char) # `path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.checksum(file_sub_path) + backend.checksum(file_sub_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.checksum(file_invalid_char) + backend.checksum(file_invalid_char) # --- copy_file --- # `src_path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.copy_file("/missing.txt", "/file.txt") + backend.copy_file("/missing.txt", "/file.txt") # `src_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.copy_file(file_invalid_path, "/file.txt") + backend.copy_file(file_invalid_path, "/file.txt") # `src_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.copy_file(file_sub_path, "/file.txt") + backend.copy_file(file_sub_path, "/file.txt") # `src_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.copy_file(file_invalid_char, "/file.txt") + backend.copy_file(file_invalid_char, "/file.txt") # `dst_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.copy_file("/file.txt", file_invalid_path) + backend.copy_file("/file.txt", file_invalid_path) # `dst_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.copy_file("/file.txt", file_sub_path) + backend.copy_file("/file.txt", file_sub_path) # `dst_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.copy_file("/file.txt", file_invalid_char) + backend.copy_file("/file.txt", file_invalid_char) # --- date --- # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.date(file_invalid_path) + backend.date(file_invalid_path) # `path` without trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.date(file_sub_path) + backend.date(file_sub_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.date(file_invalid_char) + backend.date(file_invalid_char) # --- exists --- # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.exists(file_invalid_path) + backend.exists(file_invalid_path) # `path` without trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.exists(file_sub_path) + backend.exists(file_sub_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.exists(file_invalid_char) + backend.exists(file_invalid_char) # --- get_archive --- # `src_path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.get_archive("/missing.txt", tmpdir) + backend.get_archive("/missing.txt", tmpdir) # `src_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.get_archive(file_invalid_path, tmpdir) + backend.get_archive(file_invalid_path, tmpdir) # `src_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.get_archive(file_sub_path, tmpdir) + backend.get_archive(file_sub_path, tmpdir) # `src_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.get_archive(file_invalid_char, tmpdir) + backend.get_archive(file_invalid_char, tmpdir) # `tmp_root` does not exist if platform.system() == "Windows": error_msg = "The system cannot find the path specified: 'non-existing..." else: error_msg = "No such file or directory: 'non-existing/..." with pytest.raises(FileNotFoundError, match=error_msg): - interface.get_archive(archive, tmpdir, tmp_root="non-existing") + backend.get_archive(archive, tmpdir, tmp_root="non-existing") # extension of `src_path` is not supported error_msg = "You can only extract ZIP and TAR.GZ files, ..." - interface.put_file( + backend.put_file( audeer.touch(audeer.path(tmpdir, "archive.bad")), "/archive.bad", ) with pytest.raises(RuntimeError, match=error_msg): - interface.get_archive("/archive.bad", tmpdir) + backend.get_archive("/archive.bad", tmpdir) # `src_path` is a malformed archive error_msg = "Broken archive: " - interface.put_file( + backend.put_file( audeer.touch(audeer.path(tmpdir, "malformed.zip")), "/malformed.zip", ) with pytest.raises(RuntimeError, match=error_msg): - interface.get_archive("/malformed.zip", tmpdir) + backend.get_archive("/malformed.zip", tmpdir) # no write permissions to `dst_root` if not platform.system() == "Windows": # Currently we don't know how to provoke permission error on Windows with pytest.raises(PermissionError, match=error_read_only_folder): - interface.get_archive(archive, folder_read_only) + backend.get_archive(archive, folder_read_only) # `dst_root` is not a directory with pytest.raises(NotADirectoryError, match=error_not_a_folder): - interface.get_archive(archive, local_path) + backend.get_archive(archive, local_path) # --- get_file --- # `src_path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.get_file("/missing.txt", "missing.txt") + backend.get_file("/missing.txt", "missing.txt") # `src_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.get_file(file_invalid_path, tmpdir) + backend.get_file(file_invalid_path, tmpdir) # `src_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.get_file(file_sub_path, tmpdir) + backend.get_file(file_sub_path, tmpdir) # `src_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.get_file(file_invalid_char, tmpdir) + backend.get_file(file_invalid_char, tmpdir) # no write permissions to `dst_path` if not platform.system() == "Windows": # Currently we don't know how to provoke permission error on Windows with pytest.raises(PermissionError, match=error_read_only_file): - interface.get_file(remote_file, file_read_only) + backend.get_file(remote_file, file_read_only) dst_path = audeer.path(folder_read_only, "file.txt") with pytest.raises(PermissionError, match=error_read_only_folder): - interface.get_file(remote_file, dst_path) + backend.get_file(remote_file, dst_path) # `dst_path` is an existing folder with pytest.raises(IsADirectoryError, match=error_is_a_folder): - interface.get_file(remote_file, local_folder) + backend.get_file(remote_file, local_folder) # --- join --- # joined path without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.join(file_invalid_path, local_file) + backend.join(file_invalid_path, local_file) # joined path contains invalid char with pytest.raises(ValueError, match=error_invalid_char): - interface.join(file_invalid_char, local_file) + backend.join(file_invalid_char, local_file) # --- ls --- # `path` does not exist with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls("/missing/") - interface.ls("/missing/", suppress_backend_errors=True) + backend.ls("/missing/") + backend.ls("/missing/", suppress_backend_errors=True) with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls("/missing.txt") - interface.ls("/missing.txt", suppress_backend_errors=True) + backend.ls("/missing.txt") + backend.ls("/missing.txt", suppress_backend_errors=True) remote_file_with_wrong_ext = audeer.replace_file_extension( remote_file, "missing", ) with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls(remote_file_with_wrong_ext) - interface.ls(remote_file_with_wrong_ext, suppress_backend_errors=True) + backend.ls(remote_file_with_wrong_ext) + backend.ls(remote_file_with_wrong_ext, suppress_backend_errors=True) # joined path without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.ls(file_invalid_path) + backend.ls(file_invalid_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.ls(file_invalid_char) + backend.ls(file_invalid_char) # --- move_file --- # `src_path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.move_file("/missing.txt", "/file.txt") + backend.move_file("/missing.txt", "/file.txt") # `src_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.move_file(file_invalid_path, "/file.txt") + backend.move_file(file_invalid_path, "/file.txt") # `src_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.move_file(file_sub_path, "/file.txt") + backend.move_file(file_sub_path, "/file.txt") # `src_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.move_file(file_invalid_char, "/file.txt") + backend.move_file(file_invalid_char, "/file.txt") # `dst_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.move_file("/file.txt", file_invalid_path) + backend.move_file("/file.txt", file_invalid_path) # `dst_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.move_file("/file.txt", file_sub_path) + backend.move_file("/file.txt", file_sub_path) # `dst_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.move_file("/file.txt", file_invalid_char) - - # --- owner --- - # `path` without leading '/' - with pytest.raises(ValueError, match=error_invalid_path): - interface.owner(file_invalid_path) - # `path` without trailing '/' - with pytest.raises(ValueError, match=error_sub_path): - interface.owner(file_sub_path) - # `path` contains invalid character - with pytest.raises(ValueError, match=error_invalid_char): - interface.owner(file_invalid_char) + backend.move_file("/file.txt", file_invalid_char) # --- put_archive --- # `src_root` missing error_msg = "No such file or directory: ..." with pytest.raises(FileNotFoundError, match=error_msg): - interface.put_archive( + backend.put_archive( audeer.path(tmpdir, "/missing/"), archive, files=local_file, ) # `src_root` is not a directory with pytest.raises(NotADirectoryError, match=error_not_a_folder): - interface.put_archive(local_path, archive) + backend.put_archive(local_path, archive) # `files` missing error_msg = "No such file or directory: ..." with pytest.raises(FileNotFoundError, match=error_msg): - interface.put_archive(tmpdir, archive, files="missing.txt") + backend.put_archive(tmpdir, archive, files="missing.txt") + # `dst_path` no valid archive + with pytest.raises(RuntimeError, match=error_invalid_archive): + backend.put_archive( + tmpdir, + archive_invalid_type, + files=local_file, + ) # `dst_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.put_archive( + backend.put_archive( tmpdir, file_invalid_path, files=local_file, ) # `dst_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.put_archive( + backend.put_archive( tmpdir, file_sub_path, files=local_file, ) # `dst_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.put_archive( + backend.put_archive( tmpdir, file_invalid_char, files=local_file, @@ -499,50 +441,50 @@ def test_errors(tmpdir, interface): # extension of `dst_path` is not supported error_msg = "You can only create a ZIP or TAR.GZ archive, not ..." with pytest.raises(RuntimeError, match=error_msg): - interface.put_archive(tmpdir, "/archive.bad", files=local_file) + backend.put_archive(tmpdir, "/archive.bad", files=local_file) # --- put_file --- # `src_path` does not exists error_msg = "No such file or directory: ..." with pytest.raises(FileNotFoundError, match=error_msg): - interface.put_file( + backend.put_file( audeer.path(tmpdir, "missing.txt"), remote_file, ) # `src_path` is a folder with pytest.raises(IsADirectoryError, match=error_is_a_folder): - interface.put_file(local_folder, remote_file) + backend.put_file(local_folder, remote_file) # `dst_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.put_file(local_path, file_invalid_path) + backend.put_file(local_path, file_invalid_path) # `dst_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.put_file(local_path, file_sub_path) + backend.put_file(local_path, file_sub_path) # `dst_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.put_file(local_path, file_invalid_char) + backend.put_file(local_path, file_invalid_char) # --- remove_file --- # `path` does not exists with pytest.raises(audbackend.BackendError, match=error_backend): - interface.remove_file("/missing.txt") + backend.remove_file("/missing.txt") # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.remove_file(file_invalid_path) + backend.remove_file(file_invalid_path) # `path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.remove_file(file_sub_path) + backend.remove_file(file_sub_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.remove_file(file_invalid_char) + backend.remove_file(file_invalid_char) # --- split --- # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.split(file_invalid_path) + backend.split(file_invalid_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.split(file_invalid_char) + backend.split(file_invalid_char) @pytest.mark.parametrize( @@ -552,22 +494,15 @@ def test_errors(tmpdir, interface): "/folder/test.txt", ], ) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], - indirect=True, -) -def test_exists(tmpdir, path, interface): +def test_exists(tmpdir, filesystem, path): + backend = audbackend.Unversioned(filesystem) + src_path = audeer.path(tmpdir, "~") audeer.touch(src_path) - assert not interface.exists(path) - interface.put_file(src_path, path) - assert interface.exists(path) + assert not backend.exists(path) + backend.put_file(src_path, path) + assert backend.exists(path) @pytest.mark.parametrize( @@ -591,58 +526,34 @@ def test_exists(tmpdir, path, interface): ), ], ) -@pytest.mark.parametrize( - "interface, owner", - [ - ( - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - audbackend.backend.Artifactory, - ), - ( - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - audbackend.backend.FileSystem, - ), - ( - (SingleFolder, audbackend.interface.Unversioned), - SingleFolder, - ), - ], - indirect=True, -) -def test_file(tmpdir, src_path, dst_path, owner, interface): +def test_file(tmpdir, filesystem, src_path, dst_path): + backend = audbackend.Unversioned(filesystem) + src_path = audeer.path(tmpdir, src_path) audeer.mkdir(os.path.dirname(src_path)) audeer.touch(src_path) - assert not interface.exists(dst_path) - interface.put_file(src_path, dst_path) + assert not backend.exists(dst_path) + backend.put_file(src_path, dst_path) # operation will be skipped - interface.put_file(src_path, dst_path) - assert interface.exists(dst_path) + backend.put_file(src_path, dst_path) + assert backend.exists(dst_path) - interface.get_file(dst_path, src_path) + backend.get_file(dst_path, src_path) assert os.path.exists(src_path) - assert interface.checksum(dst_path) == audeer.md5(src_path) - assert interface.owner(dst_path) == owner + assert backend.checksum(dst_path) == audeer.md5(src_path) date = datetime.datetime.today().strftime("%Y-%m-%d") - assert interface.date(dst_path) == date + assert backend.date(dst_path) == date - interface.remove_file(dst_path) - assert not interface.exists(dst_path) + backend.remove_file(dst_path) + assert not backend.exists(dst_path) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], - indirect=True, -) -def test_ls(tmpdir, interface): - assert interface.ls() == [] - assert interface.ls("/") == [] +def test_ls(tmpdir, filesystem): + backend = audbackend.Unversioned(filesystem) + + assert backend.ls() == [] + assert backend.ls("/") == [] root = [ "/file.bar", @@ -666,7 +577,7 @@ def test_ls(tmpdir, interface): tmp_file = os.path.join(tmpdir, "~") for path in root + sub + hidden: audeer.touch(tmp_file) - interface.put_file( + backend.put_file( tmp_file, path, ) @@ -686,7 +597,7 @@ def test_ls(tmpdir, interface): ("/sub/file.foo", "*.bar", []), ("/.sub/.file.foo", None, hidden), ]: - assert interface.ls( + assert backend.ls( path, pattern=pattern, ) == sorted(expected) @@ -705,129 +616,49 @@ def test_ls(tmpdir, interface): ), ], ) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.Artifactory, audbackend.interface.Unversioned), - (audbackend.backend.FileSystem, audbackend.interface.Unversioned), - (SingleFolder, audbackend.interface.Unversioned), - ], - indirect=True, -) -def test_move(tmpdir, src_path, dst_path, interface): +def test_move(tmpdir, filesystem, src_path, dst_path): + backend = audbackend.Unversioned(filesystem) + local_path = audeer.path(tmpdir, "~") audeer.touch(local_path) # move file - interface.put_file(local_path, src_path) + backend.put_file(local_path, src_path) if dst_path != src_path: - assert not interface.exists(dst_path) - interface.move_file(src_path, dst_path) + assert not backend.exists(dst_path) + backend.move_file(src_path, dst_path) if dst_path != src_path: - assert not interface.exists(src_path) - assert interface.exists(dst_path) + assert not backend.exists(src_path) + assert backend.exists(dst_path) # move file again with same checksum - interface.put_file(local_path, src_path) + backend.put_file(local_path, src_path) - interface.move_file(src_path, dst_path) + backend.move_file(src_path, dst_path) if dst_path != src_path: - assert not interface.exists(src_path) - assert interface.exists(dst_path) + assert not backend.exists(src_path) + assert backend.exists(dst_path) # move file again with different checksum with open(local_path, "w") as fp: fp.write("different checksum") - interface.put_file(local_path, src_path) + backend.put_file(local_path, src_path) if dst_path != src_path: - assert audeer.md5(local_path) != interface.checksum(dst_path) - interface.move_file(src_path, dst_path) - assert audeer.md5(local_path) == interface.checksum(dst_path) - - # clean up - - interface.remove_file(dst_path) + assert audeer.md5(local_path) != backend.checksum(dst_path) + backend.move_file(src_path, dst_path) + assert audeer.md5(local_path) == backend.checksum(dst_path) -def test_repr(): - interface = audbackend.interface.Unversioned( - audbackend.backend.FileSystem("host", "repo") - ) - assert interface.__repr__() == ( - "audbackend.interface.Unversioned(" - "audbackend.backend.FileSystem('host', 'repo')" - ")" - ) - - -def test_validate(tmpdir): - class BadChecksumBackend(audbackend.backend.FileSystem): - r"""Return random checksum.""" - - def _checksum( - self, - path: str, - ) -> str: - return "".join( - random.choices( - string.ascii_uppercase + string.digits, - k=33, - ) - ) - - path = audeer.touch(tmpdir, "~.txt") - error_msg = "Execution is interrupted because" - - audbackend.backend.FileSystem.create(tmpdir, "repo") - file_system_backend = audbackend.backend.FileSystem(tmpdir, "repo") - file_system_backend.open() - bad_checksum_backend = BadChecksumBackend(tmpdir, "repo") - bad_checksum_backend.open() - - interface = audbackend.interface.Unversioned(file_system_backend) - interface_bad = audbackend.interface.Unversioned(bad_checksum_backend) - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.put_file(path, "/remote.txt", validate=True) - assert not interface.exists("/remote.txt") - interface.put_file(path, "/remote.txt", validate=True) - assert interface.exists("/remote.txt") - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.get_file("/remote.txt", "local.txt", validate=True) - assert not os.path.exists("local.txt") - interface.get_file("/remote.txt", "local.txt", validate=True) - assert os.path.exists("local.txt") - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.copy_file("/remote.txt", "/copy.txt", validate=True) - assert not interface.exists("/copy.txt") - interface.copy_file("/remote.txt", "/copy.txt", validate=True) - assert interface.exists("/copy.txt") - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.move_file("/remote.txt", "/move.txt", validate=True) - assert not interface.exists("/move.txt") - assert interface.exists("/remote.txt") - interface.move_file("/remote.txt", "/move.txt", validate=True) - assert interface.exists("/move.txt") - assert not interface.exists("/remote.txt") - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.put_archive(tmpdir, "/remote.zip", validate=True) - assert not interface.exists("/remote.zip") - interface.put_archive(".", "/remote.zip", validate=True) - assert interface.exists("/remote.zip") - - dst_root = os.path.join(tmpdir, "extract") - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.get_archive("/remote.zip", dst_root, validate=True) - assert not os.path.exists(dst_root) - interface.get_archive("/remote.zip", dst_root, validate=True) - assert os.path.exists(dst_root) +@pytest.mark.parametrize( + "expected", + ["audbackend.Unversioned(DirFileSystem)"], +) +def test_repr(filesystem, expected): + backend = audbackend.Unversioned(filesystem) + assert repr(backend) == expected diff --git a/tests/test_validate.py b/tests/test_validate.py new file mode 100644 index 00000000..c72a0f09 --- /dev/null +++ b/tests/test_validate.py @@ -0,0 +1,104 @@ +import os +import random +import string + +import pytest + +import audeer + +import audbackend + + +# def test_validate(tmpdir, filesystem): + + +@pytest.mark.parametrize("error_msg", ["Execution is interrupted because"]) +class TestValidate: + """Test methods that provide validation. + + We inject a broken `checksum()` method + into one backend + to force the validation to go always wrong + on that backend. + + Args: + tmpdir: tmpdir fixture + filesystem: filesystemn fixture + + """ + + @classmethod + @pytest.fixture(scope="function", autouse=True) + def setup(cls, tmpdir, filesystem, monkeypatch): + """Instantiate backend objects and local file.""" + cls.backend = audbackend.Unversioned(filesystem) + + def random_checksum(path: str) -> str: + r"""Return random checksum.""" + return "".join( + random.choices( + string.ascii_uppercase + string.digits, + k=33, + ) + ) + + cls.broken_backend = audbackend.Unversioned(filesystem) + cls.broken_backend._checksum = random_checksum + cls.local_file = audeer.touch(tmpdir, "~.txt") + + working_dir = audeer.mkdir(tmpdir, "work") + monkeypatch.chdir(working_dir) + + def test_put_file(self, error_msg): + """Test validate for put_file.""" + with pytest.raises(InterruptedError, match=error_msg): + self.broken_backend.put_file(self.local_file, "/remote.txt", validate=True) + assert not self.backend.exists("/remote.txt") + self.backend.put_file(self.local_file, "/remote.txt", validate=True) + assert self.backend.exists("/remote.txt") + + def test_get_file(self, error_msg): + """Test validate for get_file.""" + self.backend.put_file(self.local_file, "/remote.txt", validate=True) + with pytest.raises(InterruptedError, match=error_msg): + self.broken_backend.get_file("/remote.txt", "local.txt", validate=True) + assert not os.path.exists("local.txt") + self.backend.get_file("/remote.txt", "local.txt", validate=True) + assert os.path.exists("local.txt") + + def test_copy_file(self, error_msg): + """Test validate for copy_file.""" + self.backend.put_file(self.local_file, "/remote.txt", validate=True) + with pytest.raises(InterruptedError, match=error_msg): + self.broken_backend.copy_file("/remote.txt", "/copy.txt", validate=True) + assert not self.backend.exists("/copy.txt") + self.backend.copy_file("/remote.txt", "/copy.txt", validate=True) + assert self.backend.exists("/copy.txt") + + def test_move_file(self, error_msg): + """Test validate for move_file.""" + self.backend.put_file(self.local_file, "/remote.txt", validate=True) + with pytest.raises(InterruptedError, match=error_msg): + self.broken_backend.move_file("/remote.txt", "/move.txt", validate=True) + assert not self.backend.exists("/move.txt") + assert self.backend.exists("/remote.txt") + self.backend.move_file("/remote.txt", "/move.txt", validate=True) + assert self.backend.exists("/move.txt") + assert not self.backend.exists("/remote.txt") + + def test_put_archive(self, error_msg): + """Test validate for put_archive.""" + with pytest.raises(InterruptedError, match=error_msg): + self.broken_backend.put_archive(".", "/remote.zip", validate=True) + assert not self.backend.exists("/remote.zip") + self.backend.put_archive(".", "/remote.zip", validate=True) + assert self.backend.exists("/remote.zip") + + def test_get_archive(self, error_msg): + """Test validate for get_archive.""" + self.backend.put_archive(".", "/remote.zip", validate=True) + with pytest.raises(InterruptedError, match=error_msg): + self.broken_backend.get_archive("/remote.zip", "./extract", validate=True) + assert not os.path.exists("./extract") + self.backend.get_archive("/remote.zip", "./extract", validate=True) + assert os.path.exists("./extract") diff --git a/tests/test_interface_versioned.py b/tests/test_versioned.py similarity index 59% rename from tests/test_interface_versioned.py rename to tests/test_versioned.py index f42c0cd6..5a2f815f 100644 --- a/tests/test_interface_versioned.py +++ b/tests/test_versioned.py @@ -1,41 +1,15 @@ import datetime import os import platform -import random import re import stat -import string import pytest import audeer import audbackend - -from singlefolder import SingleFolder - - -@pytest.fixture(scope="function", autouse=False) -def tree(tmpdir, request): - r"""Create file tree.""" - files = request.param - paths = [] - - for path in files: - if os.name == "nt": - path = path.replace("/", os.path.sep) - if path.endswith(os.path.sep): - path = audeer.path(tmpdir, path) - path = audeer.mkdir(path) - path = path + os.path.sep - paths.append(path) - else: - path = audeer.path(tmpdir, path) - audeer.mkdir(os.path.dirname(path)) - path = audeer.touch(path) - paths.append(path) - - yield paths +from tests.conftest import create_file_tree @pytest.mark.parametrize( @@ -84,19 +58,16 @@ def tree(tmpdir, request): ["dir/to/file.ext", "file.ext"], ), ], - indirect=["tree"], ) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], - indirect=True, -) -def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): +def test_archive(tmpdir, filesystem, tree, archive, files, tmp_root, expected): + backend = audbackend.Versioned(filesystem) version = "1.0.0" + src_dir = audeer.mkdir(tmpdir, "src") + dst_dir = audeer.mkdir(tmpdir, "dst") + + create_file_tree(src_dir, tree) + if tmp_root is not None: tmp_root = audeer.path(tmpdir, tmp_root) @@ -109,8 +80,8 @@ def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): if os.path.exists(tmp_root): os.removedirs(tmp_root) with pytest.raises(FileNotFoundError): - interface.put_archive( - tmpdir, + backend.put_archive( + src_dir, archive, version, files=files, @@ -118,22 +89,22 @@ def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): ) audeer.mkdir(tmp_root) - interface.put_archive( - tmpdir, + backend.put_archive( + src_dir, archive, version, files=files, tmp_root=tmp_root, ) # operation will be skipped - interface.put_archive( - tmpdir, + backend.put_archive( + src_dir, archive, version, files=files, tmp_root=tmp_root, ) - assert interface.exists(archive, version) + assert backend.exists(archive, version) # if a tmp_root is given but does not exist, # get_archive() should fail @@ -141,18 +112,18 @@ def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): if os.path.exists(tmp_root): os.removedirs(tmp_root) with pytest.raises(FileNotFoundError): - interface.get_archive( + backend.get_archive( archive, - tmpdir, + dst_dir, version, tmp_root=tmp_root, ) audeer.mkdir(tmp_root) assert ( - interface.get_archive( + backend.get_archive( archive, - tmpdir, + dst_dir, version, tmp_root=tmp_root, ) @@ -175,19 +146,10 @@ def test_archive(tmpdir, tree, archive, files, tmp_root, interface, expected): ), ], ) -@pytest.mark.parametrize( - "version", - [None, "2.0.0"], -) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], - indirect=True, -) -def test_copy(tmpdir, src_path, src_versions, dst_path, version, interface): +@pytest.mark.parametrize("version", [None, "2.0.0"]) +def test_copy(tmpdir, filesystem, src_path, src_versions, dst_path, version): + backend = audbackend.Versioned(filesystem) + if version is None: dst_versions = src_versions else: @@ -196,18 +158,18 @@ def test_copy(tmpdir, src_path, src_versions, dst_path, version, interface): local_path = audeer.path(tmpdir, "~") audeer.touch(local_path) for v in src_versions: - interface.put_file(local_path, src_path, v) + backend.put_file(local_path, src_path, v) # copy file if dst_path != src_path: for v in dst_versions: - assert not interface.exists(dst_path, v) - interface.copy_file(src_path, dst_path, version=version) + assert not backend.exists(dst_path, v) + backend.copy_file(src_path, dst_path, version=version) for v in src_versions: - assert interface.exists(src_path, v) + assert backend.exists(src_path, v) for v in dst_versions: - assert interface.exists(dst_path, v) + assert backend.exists(dst_path, v) # copy file again with different checksum @@ -215,44 +177,39 @@ def test_copy(tmpdir, src_path, src_versions, dst_path, version, interface): fp.write("different checksum") for v in src_versions: - assert audeer.md5(local_path) != interface.checksum(src_path, v) - interface.put_file(local_path, src_path, v) - assert audeer.md5(local_path) == interface.checksum(src_path, v) + assert audeer.md5(local_path) != backend.checksum(src_path, v) + backend.put_file(local_path, src_path, v) + assert audeer.md5(local_path) == backend.checksum(src_path, v) if dst_path != src_path: for v in dst_versions: - assert audeer.md5(local_path) != interface.checksum(dst_path, v) - interface.copy_file(src_path, dst_path, version=version) + assert audeer.md5(local_path) != backend.checksum(dst_path, v) + backend.copy_file(src_path, dst_path, version=version) for v in dst_versions: - assert audeer.md5(local_path) == interface.checksum(dst_path, v) + assert audeer.md5(local_path) == backend.checksum(dst_path, v) # clean up for v in src_versions: - interface.remove_file(src_path, v) + backend.remove_file(src_path, v) if dst_path != src_path: for v in dst_versions: - interface.remove_file(dst_path, v) + backend.remove_file(dst_path, v) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], - indirect=True, -) -def test_errors(tmpdir, interface): +def test_errors(tmpdir, filesystem): + backend = audbackend.Versioned(filesystem) # Ensure we have one file and one archive published on the backend archive = "/archive.zip" local_file = "file.txt" - local_path = audeer.touch(audeer.path(tmpdir, local_file)) + local_path = audeer.path(audeer.path(tmpdir, local_file)) + with open(local_path, "w") as fp: + fp.write("Text") local_folder = audeer.mkdir(audeer.path(tmpdir, "folder")) remote_file = f"/{local_file}" version = "1.0.0" - interface.put_file(local_path, remote_file, version) - interface.put_archive(tmpdir, archive, version, files=[local_file]) + backend.put_file(local_path, remote_file, version) + backend.put_archive(tmpdir, archive, version, files=[local_file]) # Create local read-only file and folder file_read_only = audeer.touch(audeer.path(tmpdir, "read-only-file.txt")) @@ -300,105 +257,105 @@ def test_errors(tmpdir, interface): # --- checksum --- # `path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.checksum("/missing.txt", version) + backend.checksum("/missing.txt", version) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.checksum(file_invalid_char, version) + backend.checksum(file_invalid_char, version) # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_char): - interface.checksum(file_invalid_char, version) + backend.checksum(file_invalid_char, version) # `path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.checksum(file_sub_path, version) + backend.checksum(file_sub_path, version) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.checksum(remote_file, empty_version) + backend.checksum(remote_file, empty_version) with pytest.raises(ValueError, match=error_invalid_version): - interface.checksum(remote_file, invalid_version) + backend.checksum(remote_file, invalid_version) # --- copy_file --- # `src_path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.copy_file("/missing.txt", "/file.txt") + backend.copy_file("/missing.txt", "/file.txt") # `src_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.copy_file(file_invalid_path, "/file.txt") + backend.copy_file(file_invalid_path, "/file.txt") # `src_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.copy_file(file_invalid_char, "/file.txt") + backend.copy_file(file_invalid_char, "/file.txt") # `src_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.copy_file(file_sub_path, "/file.txt") + backend.copy_file(file_sub_path, "/file.txt") # `dst_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.copy_file("/file.txt", file_invalid_path) + backend.copy_file("/file.txt", file_invalid_path) # `dst_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.copy_file("/file.txt", file_sub_path) + backend.copy_file("/file.txt", file_sub_path) # `dst_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.copy_file("/file.txt", file_invalid_char) + backend.copy_file("/file.txt", file_invalid_char) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.copy_file(remote_file, "/file.txt", version=empty_version) + backend.copy_file(remote_file, "/file.txt", version=empty_version) # --- date --- # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.date(file_invalid_path, version) + backend.date(file_invalid_path, version) # `path` without trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.date(file_sub_path, version) + backend.date(file_sub_path, version) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.date(file_invalid_char, version) + backend.date(file_invalid_char, version) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.date(remote_file, empty_version) + backend.date(remote_file, empty_version) with pytest.raises(ValueError, match=error_invalid_version): - interface.date(remote_file, invalid_version) + backend.date(remote_file, invalid_version) # --- exists --- # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.exists(file_invalid_path, version) + backend.exists(file_invalid_path, version) # `path` without trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.exists(file_sub_path, version) + backend.exists(file_sub_path, version) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.exists(file_invalid_char, version) + backend.exists(file_invalid_char, version) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.exists(remote_file, empty_version) + backend.exists(remote_file, empty_version) with pytest.raises(ValueError, match=error_invalid_version): - interface.exists(remote_file, invalid_version) + backend.exists(remote_file, invalid_version) # --- get_archive --- # `src_path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.get_archive("/missing.txt", tmpdir, version) + backend.get_archive("/missing.txt", tmpdir, version) # `src_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.get_archive(file_invalid_path, tmpdir, version) + backend.get_archive(file_invalid_path, tmpdir, version) # `src_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.get_archive(file_sub_path, tmpdir, version) + backend.get_archive(file_sub_path, tmpdir, version) # `src_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.get_archive(file_invalid_char, tmpdir, version) + backend.get_archive(file_invalid_char, tmpdir, version) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.get_archive(archive, tmpdir, empty_version) + backend.get_archive(archive, tmpdir, empty_version) with pytest.raises(ValueError, match=error_invalid_version): - interface.get_archive(archive, tmpdir, invalid_version) + backend.get_archive(archive, tmpdir, invalid_version) # `tmp_root` does not exist if platform.system() == "Windows": error_msg = "The system cannot find the path specified: 'non-existing..." else: error_msg = "No such file or directory: 'non-existing/..." with pytest.raises(FileNotFoundError, match=error_msg): - interface.get_archive( + backend.get_archive( archive, tmpdir, version, @@ -406,152 +363,136 @@ def test_errors(tmpdir, interface): ) # extension of `src_path` is not supported error_msg = "You can only extract ZIP and TAR.GZ files, ..." - interface.put_file( + backend.put_file( audeer.touch(audeer.path(tmpdir, "archive.bad")), "/archive.bad", version, ) with pytest.raises(RuntimeError, match=error_msg): - interface.get_archive("/archive.bad", tmpdir, version) + backend.get_archive("/archive.bad", tmpdir, version) # `src_path` is a malformed archive error_msg = "Broken archive: " - interface.put_file( + backend.put_file( audeer.touch(audeer.path(tmpdir, "malformed.zip")), "/malformed.zip", version, ) with pytest.raises(RuntimeError, match=error_msg): - interface.get_archive("/malformed.zip", tmpdir, version) + backend.get_archive("/malformed.zip", tmpdir, version) # no write permissions to `dst_root` if not platform.system() == "Windows": # Currently we don't know how to provoke permission error on Windows with pytest.raises(PermissionError, match=error_read_only_folder): - interface.get_archive(archive, folder_read_only, version) + backend.get_archive(archive, folder_read_only, version) # `dst_root` is not a directory with pytest.raises(NotADirectoryError, match=error_not_a_folder): - interface.get_archive(archive, local_path, version) + backend.get_archive(archive, local_path, version) # --- get_file --- # `src_path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.get_file("/missing.txt", "missing.txt", version) + backend.get_file("/missing.txt", "missing.txt", version) # `src_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.get_file(file_invalid_path, tmpdir, version) + backend.get_file(file_invalid_path, tmpdir, version) # `src_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.get_file(file_sub_path, tmpdir, version) + backend.get_file(file_sub_path, tmpdir, version) # `src_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.get_file(file_invalid_char, tmpdir, version) + backend.get_file(file_invalid_char, tmpdir, version) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.get_file(remote_file, local_file, empty_version) + backend.get_file(remote_file, local_file, empty_version) with pytest.raises(ValueError, match=error_invalid_version): - interface.get_file(remote_file, local_file, invalid_version) + backend.get_file(remote_file, local_file, invalid_version) # no write permissions to `dst_path` if not platform.system() == "Windows": # Currently we don't know how to provoke permission error on Windows with pytest.raises(PermissionError, match=error_read_only_file): - interface.get_file(remote_file, file_read_only, version) + backend.get_file(remote_file, file_read_only, version) dst_path = audeer.path(folder_read_only, "file.txt") with pytest.raises(PermissionError, match=error_read_only_folder): - interface.get_file(remote_file, dst_path, version) + backend.get_file(remote_file, dst_path, version) # `dst_path` is an existing folder with pytest.raises(IsADirectoryError, match=error_is_a_folder): - interface.get_file(remote_file, local_folder, version) + backend.get_file(remote_file, local_folder, version) # --- join --- # joined path without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.join(file_invalid_path, local_file) + backend.join(file_invalid_path, local_file) # joined path contains invalid char with pytest.raises(ValueError, match=error_invalid_char): - interface.join(file_invalid_char, local_file) + backend.join(file_invalid_char, local_file) # --- latest_version --- # `path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.latest_version("/missing.txt") + backend.latest_version("/missing.txt") # path without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.latest_version(file_invalid_path) + backend.latest_version(file_invalid_path) # path with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.latest_version(file_sub_path) + backend.latest_version(file_sub_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.latest_version(file_invalid_char) + backend.latest_version(file_invalid_char) # --- ls --- # `path` does not exist with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls("/missing/") - interface.ls("/missing/", suppress_backend_errors=True) + backend.ls("/missing/") + backend.ls("/missing/", suppress_backend_errors=True) with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls("/missing.txt") - interface.ls("/missing.txt", suppress_backend_errors=True) + backend.ls("/missing.txt") + backend.ls("/missing.txt", suppress_backend_errors=True) remote_file_with_wrong_ext = audeer.replace_file_extension( remote_file, "missing", ) with pytest.raises(audbackend.BackendError, match=error_backend): - interface.ls(remote_file_with_wrong_ext) - interface.ls(remote_file_with_wrong_ext, suppress_backend_errors=True) + backend.ls(remote_file_with_wrong_ext) + backend.ls(remote_file_with_wrong_ext, suppress_backend_errors=True) # joined path without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.ls(file_invalid_path) + backend.ls(file_invalid_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.ls(file_invalid_char) + backend.ls(file_invalid_char) # --- move_file --- # `src_path` missing with pytest.raises(audbackend.BackendError, match=error_backend): - interface.move_file("/missing.txt", "/file.txt") + backend.move_file("/missing.txt", "/file.txt") # `src_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.move_file(file_invalid_path, "/file.txt") + backend.move_file(file_invalid_path, "/file.txt") # `src_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.move_file(file_sub_path, "/file.txt") + backend.move_file(file_sub_path, "/file.txt") # `src_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.move_file(file_invalid_char, "/file.txt") + backend.move_file(file_invalid_char, "/file.txt") # `dst_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.move_file("/file.txt", file_invalid_path) + backend.move_file("/file.txt", file_invalid_path) # `dst_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.move_file("/file.txt", file_sub_path) + backend.move_file("/file.txt", file_sub_path) # `dst_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.move_file("/file.txt", file_invalid_char) - # invalid version - with pytest.raises(ValueError, match=error_empty_version): - interface.move_file(remote_file, "/file.txt", version=empty_version) - - # --- owner --- - # `path` without leading '/' - with pytest.raises(ValueError, match=error_invalid_path): - interface.owner(file_invalid_path, version) - # `path` without trailing '/' - with pytest.raises(ValueError, match=error_sub_path): - interface.owner(file_sub_path, version) - # `path` contains invalid character - with pytest.raises(ValueError, match=error_invalid_char): - interface.owner(file_invalid_char, version) + backend.move_file("/file.txt", file_invalid_char) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.owner(remote_file, empty_version) - with pytest.raises(ValueError, match=error_invalid_version): - interface.owner(remote_file, invalid_version) + backend.move_file(remote_file, "/file.txt", version=empty_version) # --- put_archive --- # `src_root` missing error_msg = "No such file or directory: ..." with pytest.raises(FileNotFoundError, match=error_msg): - interface.put_archive( + backend.put_archive( audeer.path(tmpdir, "/missing/"), archive, version, @@ -559,14 +500,14 @@ def test_errors(tmpdir, interface): ) # `src_root` is not a directory with pytest.raises(NotADirectoryError, match=error_not_a_folder): - interface.put_archive(local_path, archive, version) + backend.put_archive(local_path, archive, version) # `files` missing error_msg = "No such file or directory: ..." with pytest.raises(FileNotFoundError, match=error_msg): - interface.put_archive(tmpdir, archive, version, files="missing.txt") + backend.put_archive(tmpdir, archive, version, files="missing.txt") # `dst_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.put_archive( + backend.put_archive( tmpdir, file_invalid_path, version, @@ -574,7 +515,7 @@ def test_errors(tmpdir, interface): ) # `dst_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.put_archive( + backend.put_archive( tmpdir, file_sub_path, version, @@ -582,7 +523,7 @@ def test_errors(tmpdir, interface): ) # `dst_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.put_archive( + backend.put_archive( tmpdir, file_invalid_char, version, @@ -590,13 +531,13 @@ def test_errors(tmpdir, interface): ) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.put_archive(tmpdir, archive, empty_version) + backend.put_archive(tmpdir, archive, empty_version) with pytest.raises(ValueError, match=error_invalid_version): - interface.put_archive(tmpdir, archive, invalid_version) + backend.put_archive(tmpdir, archive, invalid_version) # extension of `dst_path` is not supported error_msg = "You can only create a ZIP or TAR.GZ archive, not ..." with pytest.raises(RuntimeError, match=error_msg): - interface.put_archive( + backend.put_archive( tmpdir, "/archive.bad", version, @@ -607,66 +548,66 @@ def test_errors(tmpdir, interface): # `src_path` does not exists error_msg = "No such file or directory: ..." with pytest.raises(FileNotFoundError, match=error_msg): - interface.put_file( + backend.put_file( audeer.path(tmpdir, "missing.txt"), remote_file, version, ) # `src_path` is a folder with pytest.raises(IsADirectoryError, match=error_is_a_folder): - interface.put_file(local_folder, remote_file, version) + backend.put_file(local_folder, remote_file, version) # `dst_path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.put_file(local_path, file_invalid_path, version) + backend.put_file(local_path, file_invalid_path, version) # `dst_path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.put_file(local_path, file_sub_path, version) + backend.put_file(local_path, file_sub_path, version) # `dst_path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.put_file(local_path, file_invalid_char, version) + backend.put_file(local_path, file_invalid_char, version) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.put_file(local_path, remote_file, empty_version) + backend.put_file(local_path, remote_file, empty_version) with pytest.raises(ValueError, match=error_invalid_version): - interface.put_file(local_path, remote_file, invalid_version) + backend.put_file(local_path, remote_file, invalid_version) # --- remove_file --- # `path` does not exists with pytest.raises(audbackend.BackendError, match=error_backend): - interface.remove_file("/missing.txt", version) + backend.remove_file("/missing.txt", version) # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.remove_file(file_invalid_path, version) + backend.remove_file(file_invalid_path, version) # `path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.remove_file(file_sub_path, version) + backend.remove_file(file_sub_path, version) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.remove_file(file_invalid_char, version) + backend.remove_file(file_invalid_char, version) # invalid version with pytest.raises(ValueError, match=error_empty_version): - interface.remove_file(remote_file, empty_version) + backend.remove_file(remote_file, empty_version) with pytest.raises(ValueError, match=error_invalid_version): - interface.remove_file(remote_file, invalid_version) + backend.remove_file(remote_file, invalid_version) # --- split --- # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.split(file_invalid_path) + backend.split(file_invalid_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.split(file_invalid_char) + backend.split(file_invalid_char) # --- versions --- # `path` without leading '/' with pytest.raises(ValueError, match=error_invalid_path): - interface.versions(file_invalid_path) + backend.versions(file_invalid_path) # `path` with trailing '/' with pytest.raises(ValueError, match=error_sub_path): - interface.versions(file_sub_path) + backend.versions(file_sub_path) # `path` contains invalid character with pytest.raises(ValueError, match=error_invalid_char): - interface.versions(file_invalid_char) + backend.versions(file_invalid_char) @pytest.mark.parametrize( @@ -676,21 +617,14 @@ def test_errors(tmpdir, interface): ("/folder/test.txt", "1.0.0"), ], ) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], - indirect=True, -) -def test_exists(tmpdir, path, version, interface): +def test_exists(tmpdir, filesystem, path, version): + backend = audbackend.Versioned(filesystem) src_path = audeer.path(tmpdir, "~") audeer.touch(src_path) - assert not interface.exists(path, version) - interface.put_file(src_path, path, version) - assert interface.exists(path, version) + assert not backend.exists(path, version) + backend.put_file(src_path, path, version) + assert backend.exists(path, version) @pytest.mark.parametrize( @@ -718,50 +652,28 @@ def test_exists(tmpdir, path, version, interface): ), ], ) -@pytest.mark.parametrize( - "interface, owner", - [ - ( - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - audbackend.backend.FileSystem, - ), - ( - (SingleFolder, audbackend.interface.Versioned), - SingleFolder, - ), - ], - indirect=True, -) -def test_file(tmpdir, src_path, dst_path, version, interface, owner): +def test_file(tmpdir, filesystem, src_path, dst_path, version): + backend = audbackend.Versioned(filesystem) src_path = audeer.path(tmpdir, src_path) audeer.mkdir(os.path.dirname(src_path)) audeer.touch(src_path) - assert not interface.exists(dst_path, version) - interface.put_file(src_path, dst_path, version) + assert not backend.exists(dst_path, version) + backend.put_file(src_path, dst_path, version) # operation will be skipped - interface.put_file(src_path, dst_path, version) - assert interface.exists(dst_path, version) + backend.put_file(src_path, dst_path, version) + assert backend.exists(dst_path, version) - interface.get_file(dst_path, src_path, version) + backend.get_file(dst_path, src_path, version) assert os.path.exists(src_path) - assert interface.checksum(dst_path, version) == audeer.md5(src_path) - assert interface.owner(dst_path, version) == owner + assert backend.checksum(dst_path, version) == audeer.md5(src_path) date = datetime.datetime.today().strftime("%Y-%m-%d") - assert interface.date(dst_path, version) == date + assert backend.date(dst_path, version) == date - interface.remove_file(dst_path, version) - assert not interface.exists(dst_path, version) + backend.remove_file(dst_path, version) + assert not backend.exists(dst_path, version) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], - indirect=True, -) @pytest.mark.parametrize( "files", [ @@ -997,17 +909,18 @@ def test_file(tmpdir, src_path, dst_path, version, interface, owner): ), ], ) -def test_ls(tmpdir, interface, files, path, latest, pattern, expected): - assert interface.ls() == [] - assert interface.ls("/") == [] +def test_ls(tmpdir, filesystem, files, path, latest, pattern, expected): + backend = audbackend.Versioned(filesystem) + assert backend.ls() == [] + assert backend.ls("/") == [] # create content tmp_file = audeer.touch(tmpdir, "~") for file_path, file_version in files: - interface.put_file(tmp_file, file_path, file_version) + backend.put_file(tmp_file, file_path, file_version) # test - assert interface.ls( + assert backend.ls( path, latest_version=latest, pattern=pattern, @@ -1029,19 +942,9 @@ def test_ls(tmpdir, interface, files, path, latest, pattern, expected): ), ], ) -@pytest.mark.parametrize( - "version", - [None, "2.0.0"], -) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], - indirect=True, -) -def test_move(tmpdir, src_path, src_versions, dst_path, version, interface): +@pytest.mark.parametrize("version", [None, "2.0.0"]) +def test_move(tmpdir, filesystem, src_path, src_versions, dst_path, version): + backend = audbackend.Versioned(filesystem) if version is None: dst_versions = src_versions else: @@ -1053,29 +956,29 @@ def test_move(tmpdir, src_path, src_versions, dst_path, version, interface): # move file for v in src_versions: - interface.put_file(local_path, src_path, v) + backend.put_file(local_path, src_path, v) if dst_path != src_path: for v in dst_versions: - assert not interface.exists(dst_path, v) - interface.move_file(src_path, dst_path, version=version) + assert not backend.exists(dst_path, v) + backend.move_file(src_path, dst_path, version=version) if dst_path != src_path: for v in dst_versions: - assert not interface.exists(src_path, v) + assert not backend.exists(src_path, v) for v in dst_versions: - assert interface.exists(dst_path, v) + assert backend.exists(dst_path, v) # move file again with same checksum for v in src_versions: - interface.put_file(local_path, src_path, v) + backend.put_file(local_path, src_path, v) - interface.move_file(src_path, dst_path, version=version) + backend.move_file(src_path, dst_path, version=version) if dst_path != src_path: for v in dst_versions: - assert not interface.exists(src_path, v) + assert not backend.exists(src_path, v) for v in dst_versions: - assert interface.exists(dst_path, v) + assert backend.exists(dst_path, v) # move file again with different checksum @@ -1083,179 +986,56 @@ def test_move(tmpdir, src_path, src_versions, dst_path, version, interface): fp.write("different checksum") for v in src_versions: - interface.put_file(local_path, src_path, v) + backend.put_file(local_path, src_path, v) if dst_path != src_path: for v in dst_versions: - assert audeer.md5(local_path) != interface.checksum(dst_path, v) - interface.move_file(src_path, dst_path, version=version) + assert audeer.md5(local_path) != backend.checksum(dst_path, v) + backend.move_file(src_path, dst_path, version=version) for v in dst_versions: - assert audeer.md5(local_path) == interface.checksum(dst_path, v) + assert audeer.md5(local_path) == backend.checksum(dst_path, v) # clean up for v in dst_versions: - interface.remove_file(dst_path, v) + backend.remove_file(dst_path, v) -def test_repr(): - interface = audbackend.interface.Versioned( - audbackend.backend.FileSystem("host", "repo") - ) - assert interface.__repr__() == ( - "audbackend.interface.Versioned(audbackend.backend.FileSystem('host', 'repo'))" - ) +@pytest.mark.parametrize( + "expected", + ["audbackend.Versioned(DirFileSystem)"], +) +def test_repr(filesystem, expected): + backend = audbackend.Versioned(filesystem) + assert repr(backend) == expected @pytest.mark.parametrize("dst_path", ["/file.ext", "/sub/file.ext"]) -@pytest.mark.parametrize( - "interface", - [ - (audbackend.backend.FileSystem, audbackend.interface.Versioned), - (SingleFolder, audbackend.interface.Versioned), - ], - indirect=True, -) -def test_versions(tmpdir, dst_path, interface): +def test_versions(tmpdir, filesystem, dst_path): + backend = audbackend.Versioned(filesystem) src_path = audeer.path(tmpdir, "~") audeer.touch(src_path) # empty backend with pytest.raises(audbackend.BackendError): - interface.versions(dst_path) - assert not interface.versions(dst_path, suppress_backend_errors=True) + backend.versions(dst_path) + assert not backend.versions(dst_path, suppress_backend_errors=True) with pytest.raises(audbackend.BackendError): - interface.latest_version(dst_path) + backend.latest_version(dst_path) # v1 - interface.put_file(src_path, dst_path, "1.0.0") - assert interface.versions(dst_path) == ["1.0.0"] - assert interface.latest_version(dst_path) == "1.0.0" + backend.put_file(src_path, dst_path, "1.0.0") + assert backend.versions(dst_path) == ["1.0.0"] + assert backend.latest_version(dst_path) == "1.0.0" # v2 - interface.put_file(src_path, dst_path, "2.0.0") - assert interface.versions(dst_path) == ["1.0.0", "2.0.0"] - assert interface.latest_version(dst_path) == "2.0.0" + backend.put_file(src_path, dst_path, "2.0.0") + assert backend.versions(dst_path) == ["1.0.0", "2.0.0"] + assert backend.latest_version(dst_path) == "2.0.0" # v3 with a different extension other_ext = "other" other_remote_file = audeer.replace_file_extension(dst_path, other_ext) - interface.put_file(src_path, other_remote_file, "3.0.0") - assert interface.versions(dst_path) == ["1.0.0", "2.0.0"] - assert interface.latest_version(dst_path) == "2.0.0" - - -def test_validate(tmpdir): - class BadChecksumBackend(audbackend.backend.FileSystem): - r"""Return random checksum.""" - - def _checksum( - self, - path: str, - ) -> str: - return "".join( - random.choices( - string.ascii_uppercase + string.digits, - k=33, - ) - ) - - path = audeer.touch(tmpdir, "~.txt") - error_msg = "Execution is interrupted because" - - audbackend.backend.FileSystem.create(tmpdir, "repo") - file_system_backend = audbackend.backend.FileSystem(tmpdir, "repo") - file_system_backend.open() - bad_checksum_backend = BadChecksumBackend(tmpdir, "repo") - bad_checksum_backend.open() - - interface = audbackend.interface.Versioned(file_system_backend) - interface_bad = audbackend.interface.Versioned(bad_checksum_backend) - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.put_file(path, "/remote.txt", "1.0.0", validate=True) - assert not interface.exists("/remote.txt", "1.0.0") - interface.put_file(path, "/remote.txt", "1.0.0", validate=True) - assert interface.exists("/remote.txt", "1.0.0") - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.get_file( - "/remote.txt", - "local.txt", - "1.0.0", - validate=True, - ) - assert not os.path.exists("local.txt") - interface.get_file( - "/remote.txt", - "local.txt", - "1.0.0", - validate=True, - ) - assert os.path.exists("local.txt") - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.copy_file( - "/remote.txt", - "/copy.txt", - validate=True, - ) - assert not interface.exists("/copy.txt", "1.0.0") - interface.copy_file( - "/remote.txt", - "/copy.txt", - version="1.0.0", - validate=True, - ) - assert interface.exists("/copy.txt", "1.0.0") - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.move_file( - "/remote.txt", - "/move.txt", - version="1.0.0", - validate=True, - ) - assert not interface.exists("/move.txt", "1.0.0") - assert interface.exists("/remote.txt", "1.0.0") - interface.move_file( - "/remote.txt", - "/move.txt", - version="1.0.0", - validate=True, - ) - assert interface.exists("/move.txt", "1.0.0") - assert not interface.exists("/remote.txt", "1.0.0") - - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.put_archive( - tmpdir, - "/remote.zip", - "1.0.0", - validate=True, - ) - assert not interface.exists("/remote.zip", "1.0.0") - interface.put_archive( - ".", - "/remote.zip", - "1.0.0", - validate=True, - ) - assert interface.exists("/remote.zip", "1.0.0") - - dst_root = os.path.join(tmpdir, "extract") - with pytest.raises(InterruptedError, match=error_msg): - interface_bad.get_archive( - "/remote.zip", - dst_root, - "1.0.0", - validate=True, - ) - assert not os.path.exists(dst_root) - interface.get_archive( - "/remote.zip", - dst_root, - "1.0.0", - validate=True, - ) - assert os.path.exists(dst_root) + backend.put_file(src_path, other_remote_file, "3.0.0") + assert backend.versions(dst_path) == ["1.0.0", "2.0.0"] + assert backend.latest_version(dst_path) == "2.0.0"