Skip to content

Commit

Permalink
Initial commit (#944)
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie authored Oct 23, 2024
1 parent e28bc1c commit 27c2fde
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 7 deletions.
15 changes: 11 additions & 4 deletions darwin/dataset/upload_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,18 +452,20 @@ def skip_existing_full_remote_filepaths(self) -> None:
local_files_to_remove.extend(multi_file_item.files)
multi_file_items_to_remove.append(multi_file_item)
console.print(
f"The remote filepath {multi_file_item.full_path} is already occupied by a dataset item in the {self.dataset.slug} dataset. Skipping upload of item.",
f"The remote filepath {multi_file_item.full_path} is already occupied by a dataset item in the `{self.dataset.slug}` dataset. Skipping upload of item.",
style="warning",
)
if self.local_files:
for local_file in self.local_files:
if Path(local_file.full_path) in full_remote_filepaths:
if (
Path(local_file.full_path) in full_remote_filepaths
and local_file not in local_files_to_remove
):
local_files_to_remove.append(local_file)
console.print(
f"The remote filepath {local_file.full_path} already exists in the {self.dataset.slug} dataset. Skipping upload of item.",
f"The remote filepath {local_file.full_path} already exists in the `{self.dataset.slug}` dataset. Skipping upload of item.",
style="warning",
)

self.local_files = [
local_file
for local_file in self.local_files
Expand All @@ -476,6 +478,11 @@ def skip_existing_full_remote_filepaths(self) -> None:
if multi_file_item not in multi_file_items_to_remove
]

if not self.local_files and not self.multi_file_items:
raise ValueError(
"All items to be uploaded have paths that already exist in the remote dataset. No items to upload."
)

def prepare_upload(
self,
) -> Optional[Iterator[Callable[[Optional[ByteReadCallback]], None]]]:
Expand Down
31 changes: 28 additions & 3 deletions tests/darwin/dataset/upload_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ def test_request_upload_is_not_called_on_init(
dataset: RemoteDataset, request_upload_endpoint: str
):
with patch.object(dataset, "fetch_remote_files", return_value=[]):
upload_handler = UploadHandler.build(dataset, [])
with patch.object(
UploadHandler, "skip_existing_full_remote_filepaths", return_value=[]
):
upload_handler = UploadHandler.build(dataset, [])

assert upload_handler.pending_count == 0
assert upload_handler.blocked_count == 0
Expand Down Expand Up @@ -446,7 +449,7 @@ def test_skip_existing_full_remote_filepaths_with_local_files():
assert local_file_2 in upload_handler.local_files

mock_print.assert_any_call(
"The remote filepath /existing_file_1.jpg already exists in the test-dataset dataset. Skipping upload of item.",
"The remote filepath /existing_file_1.jpg already exists in the `test-dataset` dataset. Skipping upload of item.",
style="warning",
)

Expand Down Expand Up @@ -475,6 +478,28 @@ def test_skip_existing_full_remote_filepaths_with_multi_file_items():

# Verify that the correct warning was printed
mock_print.assert_any_call(
"The remote filepath /existing_multi_file_item.jpg is already occupied by a dataset item in the test-dataset dataset. Skipping upload of item.",
"The remote filepath /existing_multi_file_item.jpg is already occupied by a dataset item in the `test-dataset` dataset. Skipping upload of item.",
style="warning",
)


def test_skip_existing_full_remote_filepaths_raises_if_no_files_left():
mock_dataset = MagicMock()
mock_dataset.fetch_remote_files.return_value = [
MagicMock(full_path="/existing_multi_file_item_1.jpg"),
MagicMock(full_path="/existing_multi_file_item_2.jpg"),
]
mock_dataset.slug = "test-dataset"

multi_file_item_1 = MagicMock(
full_path="/existing_multi_file_item_1.jpg", files=[MagicMock()]
)
multi_file_item_2 = MagicMock(
full_path="/existing_multi_file_item_2.jpg", files=[MagicMock()]
)

with pytest.raises(
ValueError,
match="All items to be uploaded have paths that already exist in the remote dataset. No items to upload.",
):
UploadHandlerV2(mock_dataset, [], [multi_file_item_1, multi_file_item_2])

0 comments on commit 27c2fde

Please sign in to comment.