From bdf6abfafa822746eb0940844ecd5369cf79e401 Mon Sep 17 00:00:00 2001 From: Tom Close Date: Tue, 26 Mar 2024 11:39:40 +1100 Subject: [PATCH] implement bytes_repr in pydra instead of in fileformats --- pydra/utils/hash.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py index 7ecd977857..d1907c5aa2 100644 --- a/pydra/utils/hash.py +++ b/pydra/utils/hash.py @@ -53,7 +53,7 @@ ) Hash = NewType("Hash", bytes) -CacheKey = NewType("CacheKey", ty.Tuple[ty.Hashable, ty.Hashable]) +CacheKey = NewType("CacheKey", ty.Tuple[ty.Hashable, ...]) def location_converter(path: ty.Union[Path, str, None]) -> Path: @@ -472,18 +472,29 @@ def bytes_repr_fileset( fileset: FileSet, cache: Cache ) -> Iterator[ty.Union[CacheKey, bytes]]: fspaths = sorted(fileset.fspaths) + # Yield the cache key for the fileset, which is a tuple of the file-system paths + # and their mtime. Is used to store persistent cache of the fileset hashes + # to avoid recomputation between calls yield CacheKey( tuple(repr(p) for p in fspaths) # type: ignore[arg-type] + tuple(p.lstat().st_mtime_ns for p in fspaths) ) - yield from fileset.__bytes_repr__(cache) + cls = type(fileset) + yield f"{cls.__module__}.{cls.__name__}:".encode() + for key, chunk_iter in fileset.byte_chunks(): + yield (",'" + key + "'=").encode() + yield from chunk_iter +# Need to disable the mtime cache key for mocked filesets. Used in doctests @register_serializer(MockMixin) def bytes_repr_mock_fileset( mock_fileset: MockMixin, cache: Cache ) -> Iterator[ty.Union[CacheKey, bytes]]: - yield from mock_fileset.__bytes_repr__(cache) + cls = type(mock_fileset) + yield f"{cls.__module__}.{cls.__name__}:".encode() + for key, _ in mock_fileset.byte_chunks(): + yield (",'" + key + "'").encode() @register_serializer(list)