From 88e4e0b0e5809c8f92c7cf23d094ed15a2a9b8cd Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 22 May 2024 12:00:01 +1000 Subject: [PATCH 1/2] adds bytes repr implementation for mock fileset types to get pydra task doctests to work --- pydra/utils/hash.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 1946b4b364..4184ef55ba 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -20,7 +20,7 @@ ) from filelock import SoftFileLock import attrs.exceptions -from fileformats.core import FileSet +from fileformats.core.fileset import FileSet, MockMixin from . import user_cache_dir, add_exc_note logger = logging.getLogger("pydra") @@ -485,6 +485,13 @@ def bytes_repr_fileset( yield from fileset.__bytes_repr__(cache) +@register_serializer(MockMixin) +def bytes_repr_mock_fileset( + mock_fileset: MockMixin, cache: Cache +) -> Iterator[ty.Union[CacheKey, bytes]]: + yield from mock_fileset.__bytes_repr__(cache) + + @register_serializer(list) @register_serializer(tuple) def bytes_repr_seq(obj: Sequence, cache: Cache) -> Iterator[bytes]: From dd6fa8a48172347ff60856712b5bd972a9ad06c6 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Wed, 22 May 2024 12:00:01 +1000 Subject: [PATCH 2/2] implement bytes_repr in pydra instead of in fileformats --- pydra/utils/hash.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 4184ef55ba..3ba3e97b44 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -55,7 +55,7 @@ ) Hash = NewType("Hash", bytes) -CacheKey = NewType("CacheKey", ty.Tuple[ty.Hashable, ty.Hashable]) +CacheKey = NewType("CacheKey", ty.Tuple[ty.Hashable, ...]) def location_converter(path: ty.Union[Path, str, None]) -> Path: @@ -478,18 +478,29 @@ def bytes_repr_fileset( fileset: FileSet, cache: Cache ) -> Iterator[ty.Union[CacheKey, bytes]]: fspaths = sorted(fileset.fspaths) + # Yield the cache key for the fileset, which is a tuple of the file-system paths + # and their mtime. Is used to store persistent cache of the fileset hashes + # to avoid recomputation between calls yield CacheKey( tuple(repr(p) for p in fspaths) # type: ignore[arg-type] + tuple(p.lstat().st_mtime_ns for p in fspaths) ) - yield from fileset.__bytes_repr__(cache) + cls = type(fileset) + yield f"{cls.__module__}.{cls.__name__}:".encode() + for key, chunk_iter in fileset.byte_chunks(): + yield (",'" + key + "'=").encode() + yield from chunk_iter +# Need to disable the mtime cache key for mocked filesets. Used in doctests @register_serializer(MockMixin) def bytes_repr_mock_fileset( mock_fileset: MockMixin, cache: Cache ) -> Iterator[ty.Union[CacheKey, bytes]]: - yield from mock_fileset.__bytes_repr__(cache) + cls = type(mock_fileset) + yield f"{cls.__module__}.{cls.__name__}:".encode() + for key, _ in mock_fileset.byte_chunks(): + yield (",'" + key + "'").encode() @register_serializer(list)