Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix sort discrepancy between Path and str #8

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions src/aibs_informatics_aws_utils/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def download_s3_object(
if force or should_sync(
source_path=s3_path, destination_path=local_path, size_only=size_only, **kwargs
):
if local_path.exists() and not exist_ok:
if local_path.exists() and not exist_ok and not force:
raise ValueError(f"Unable to download S3 object to {local_path}. Path exists.")
elif local_path.exists() and local_path.is_dir() and exist_ok:
logger.warning(
Expand Down Expand Up @@ -1109,36 +1109,48 @@ def check_paths_in_sync(
"""
source_paths: Union[List[Path], List[S3URI]] = (
(
sorted(map(Path, find_paths(source_path, include_dirs=False)))
[Path(p) for p in sorted(find_paths(source_path, include_dirs=False))]
if source_path.is_dir()
else [source_path]
)
if isinstance(source_path, Path)
else (
list_s3_paths(source_path, **kwargs) if not is_object(source_path) else [source_path]
sorted(list_s3_paths(source_path, **kwargs)) # type: ignore[arg-type]
if not is_object(source_path)
else [source_path] # type: ignore
)
)
destination_paths: Union[List[Path], List[S3URI]] = (
(
sorted(map(Path, find_paths(destination_path, include_dirs=False)))
list(map(Path, sorted(find_paths(destination_path, include_dirs=False))))
if destination_path.is_dir()
else [destination_path]
)
if isinstance(destination_path, Path)
else (
list_s3_paths(destination_path, **kwargs)
sorted(list_s3_paths(destination_path, **kwargs)) # type: ignore[arg-type]
if not is_object(destination_path)
else [destination_path]
else [destination_path] # type: ignore
)
)
if len(source_paths) == 0:
raise ValueError(f"Source path {source_path} does not exist")
if len(source_paths) != len(destination_paths):
logger.info(
"Source and destination paths have different number of paths. "
f"Source path {source_path} has {len(source_paths)} paths, "
f"destination path {destination_path} has {len(destination_paths)} paths"
)
return False
for sp, dp in zip(source_paths, destination_paths):
if str(sp).removeprefix(str(source_path)) != str(dp).removeprefix(str(destination_path)):
logger.info(
f"Source path {sp} (relative={str(sp).removeprefix(str(source_path))}) does not match "
f"destination path {dp} (relative={str(dp).removeprefix(str(destination_path))})"
)
return False
if should_sync(source_path=sp, destination_path=dp, size_only=size_only, **kwargs):
logger.info(f"Source path {sp} content does not match destination path {dp}")
return False
return True

Expand Down
29 changes: 27 additions & 2 deletions test/aibs_informatics_aws_utils/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,8 @@ def test__upload_path__download_s3_path__handles_file(self):
upload_path(previous_file, s3_path)
self.assertTrue(is_object(s3_path))
with self.assertRaises(Exception):
download_s3_path(s3_path, existing_file, exist_ok=False)
download_s3_path(s3_path, existing_file, exist_ok=True)
download_s3_path(s3_path, existing_file, force=False, exist_ok=False)
download_s3_path(s3_path, existing_file, force=False, exist_ok=True)
download_s3_path(s3_path, non_existing_file, exist_ok=False)
self.assertEqual(existing_file.read_text(), previous_file.read_text())
self.assertEqual(non_existing_file.read_text(), previous_file.read_text())
Expand Down Expand Up @@ -1063,6 +1063,31 @@ def test__check_paths_in_sync__simple__folders_different(self):
assert check_paths_in_sync(source_s3_path, destination_path2) is False
assert check_paths_in_sync(source_s3_path, destination_s3_path2) is False

def test__check_paths_in_sync__handles_sorting_issues__folders_same(self):
source_path = self.tmp_path()
(source_path / "a.txt").write_text("hello")
(source_path / "a").mkdir()
(source_path / "a" / "b.txt").write_text("again")

source_s3_path = self.get_s3_path("source")
self.put_object(key="source/a.txt", content="hello")
self.put_object(key="source/a/b.txt", content="again")

destination_path = self.tmp_path()
(destination_path / "a.txt").write_text("hello")
(destination_path / "a").mkdir()
(destination_path / "a" / "b.txt").write_text("again")

destination_s3_path = self.get_s3_path("destination")
self.put_object(key="destination/a.txt", content="hello")
self.put_object(key="destination/a/b.txt", content="again")

# Should succeed
assert check_paths_in_sync(source_path, destination_path) is True
assert check_paths_in_sync(source_path, destination_s3_path) is True
assert check_paths_in_sync(source_s3_path, destination_path) is True
assert check_paths_in_sync(source_s3_path, destination_s3_path) is True


@fixture(scope="function")
def s3_bucket_fixture(aws_credentials_fixture, request):
Expand Down
Loading