v7labs · JBWilkie · Nov 22, 2024 · Nov 19, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/e2e_tests/cli/test_full_cycle.py b/e2e_tests/cli/test_full_cycle.py
@@ -1,12 +1,32 @@
+import os
 import shutil
 from pathlib import Path
 
 from e2e_tests.helpers import assert_cli, run_cli_command, export_release
 from e2e_tests.objects import E2EDataset, ConfigValues
 from e2e_tests.cli.test_import import compare_annotations_export
+from e2e_tests.cli.test_push import extract_and_push
 
 
-def test_full_cycle(
+def copy_files_to_flat_directory(source_dir, target_dir):
+    """
+    Multi-source file items are pulled in a structure where every file is contained in a
+    folder named after the slot it's in. Multi-file push only considers files in the
+    top-level of the passed directory, so to push pulled multi-file items as multi-file
+    items, we need to copy the files into a flat directory
+    """
+    if not os.path.exists(target_dir):
+        os.makedirs(target_dir)
+    for root, dirs, files in os.walk(source_dir):
+        for file in files:
+            if file.endswith(".jpg"):
+                shutil.copy(
+                    os.path.join(root, file),
+                    os.path.join(target_dir, file),
+                )
+
+
+def test_full_cycle_images(
     local_dataset: E2EDataset,
     config_values: ConfigValues,
 ):
@@ -103,3 +123,293 @@ def test_full_cycle(
         item_type,
         unzip=False,
     )
+
+
+def test_full_cycle_video(
+    local_dataset: E2EDataset,
+    config_values: ConfigValues,
+):
+    """
+    This test performs the following steps:
+    - 1: Pushes a video to the dataset
+    - 2: Imports some annotations
+    - 3: Creates and pulls a release of the dataset
+    - 4: Deletes all items from the dataset
+    - 5: Pushes and imports the pulled files & annotations to the dataset
+    - 6: Deletes locally pulled copies of the dataset files
+    - 7: Creates and pulls a new release of the dataset
+    - 8: Assert that the pulled data is as expected
+
+    It is designed to catch errors that may arise from changes to exported Darwin JSON
+    """
+    item_type = "single_slotted_video"
+    annotation_format = "darwin"
+    first_release_name = "first_release"
+    second_release_name = "second_release"
+    zipped_video_dir = "25_frame_video"
+    push_dir = Path(__file__).parents[1] / "data" / "push" / f"{zipped_video_dir}.zip"
+    pull_dir = Path(
+        f"{Path.home()}/.darwin/datasets/{config_values.team_slug}/{local_dataset.slug}"
+    )
+    annotations_import_dir = (
+        Path(__file__).parents[1] / "data" / "import" / "video_annotations_small_video"
+    )
+    expected_filepaths = [
+        f"{pull_dir}/images/small_video.mp4",
+    ]
+
+    # Push a video to the dataset
+    extract_and_push(push_dir, local_dataset, config_values, zipped_video_dir)
+
+    # Upload annotations to the dataset
+    result = run_cli_command(
+        f"darwin dataset import {local_dataset.name} {annotation_format} {annotations_import_dir}"
+    )
+    assert_cli(result, 0)
+
+    # Pull a first release of the dataset
+    original_release = export_release(
+        annotation_format, local_dataset, config_values, release_name=first_release_name
+    )
+    result = run_cli_command(
+        f"darwin dataset pull {local_dataset.name}:{original_release.name}"
+    )
+    assert_cli(result, 0)
+
+    # Delete all items in the dataset
+    local_dataset.delete_items(config_values)
+
+    # Push and import the pulled files and annotations to the dataset
+    result = run_cli_command(
+        f"darwin dataset push {local_dataset.name} {pull_dir}/images --preserve-folders"
+    )
+    assert_cli(result, 0)
+    result = run_cli_command(
+        f"darwin dataset import {local_dataset.name} {annotation_format} {pull_dir}/releases/{first_release_name}/annotations"
+    )
+    assert_cli(result, 0)
+
+    # Delete local copies of the dataset files for the dataset
+    shutil.rmtree(f"{pull_dir}/images")
+
+    # Pull a second release of the dataset
+    new_release = export_release(
+        annotation_format,
+        local_dataset,
+        config_values,
+        release_name=second_release_name,
+    )
+    result = run_cli_command(
+        f"darwin dataset pull {local_dataset.name}:{new_release.name}"
+    )
+    assert_cli(result, 0)
+
+    # Check that all expected files have been downloaded
+    all_filepaths = list(pull_dir.rglob("*"))
+    for expected_file in expected_filepaths:
+        assert Path(expected_file) in all_filepaths
+
+    # Check that all downloaded annotations are as expected
+    compare_annotations_export(
+        Path(f"{pull_dir}/releases/{first_release_name}/annotations"),
+        Path(f"{pull_dir}/releases/{second_release_name}/annotations"),
+        item_type,
+        unzip=False,
+    )
+
+
+def test_full_cycle_multi_slotted_item(
+    local_dataset: E2EDataset,
+    config_values: ConfigValues,
+):
+    """
+    This test performs the following steps:
+    - 1: Registers a multi-slotted item from external storage to a dataset
+    - 2: Imports some annotations
+    - 3: Creates and pulls a release of the dataset
+    - 4: Deletes all items from the dataset
+    - 5: Pushes and imports the pulled files & annotations to the dataset
+    - 6: Deletes locally pulled copies of the dataset files
+    - 7: Creates and pulls a new release of the dataset
+    - 8: Assert that the pulled data is as expected
+
+    It is designed to catch errors that may arise from changes to exported Darwin JSON
+    """
+    item_type = "multi_slotted"
+    annotation_format = "darwin"
+    first_release_name = "first_release"
+    second_release_name = "second_release"
+    pull_dir = Path(
+        f"{Path.home()}/.darwin/datasets/{config_values.team_slug}/{local_dataset.slug}"
+    )
+    annotations_import_dir = (
+        Path(__file__).parents[1]
+        / "data"
+        / "import"
+        / "multi_slotted_annotations_with_slots_defined"
+    )
+    expected_filepaths = [
+        f"{pull_dir}/images/multi_slotted_item/0/image_1.jpg",
+        f"{pull_dir}/images/multi_slotted_item/1/image_2.jpg",
+        f"{pull_dir}/images/multi_slotted_item/2/image_3.jpg",
+    ]
+
+    # Populate the dataset with items and annotations
+    local_dataset.register_read_only_items(config_values, item_type)
+    result = run_cli_command(
+        f"darwin dataset import {local_dataset.name} {annotation_format} {annotations_import_dir}"
+    )
+    assert_cli(result, 0)
+
+    # Pull a first release of the dataset
+    original_release = export_release(
+        annotation_format, local_dataset, config_values, release_name=first_release_name
+    )
+    result = run_cli_command(
+        f"darwin dataset pull {local_dataset.name}:{original_release.name}"
+    )
+    assert_cli(result, 0)
+
+    # Delete all items in the dataset
+    local_dataset.delete_items(config_values)
+
+    # Create a temporary directory for pushing files
+    tmp_push_dir = f"{pull_dir}/multi_slotted_item"
+    copy_files_to_flat_directory(f"{pull_dir}/images", tmp_push_dir)
+
+    # Push and import the pulled files and annotations to the dataset
+    result = run_cli_command(
+        f"darwin dataset push {local_dataset.name} {tmp_push_dir} --item-merge-mode slots"
+    )
+    assert_cli(result, 0)
+    result = run_cli_command(
+        f"darwin dataset import {local_dataset.name} {annotation_format} {pull_dir}/releases/{first_release_name}/annotations"
+    )
+    assert_cli(result, 0)
+
+    # Delete local copies of the dataset files for the dataset
+    shutil.rmtree(f"{pull_dir}/images")
+
+    # Pull a second release of the dataset
+    new_release = export_release(
+        annotation_format,
+        local_dataset,
+        config_values,
+        release_name=second_release_name,
+    )
+    result = run_cli_command(
+        f"darwin dataset pull {local_dataset.name}:{new_release.name}"
+    )
+    assert_cli(result, 0)
+
+    # Check that all expected files have been downloaded
+    all_filepaths = list(pull_dir.rglob("*"))
+    for expected_file in expected_filepaths:
+        assert Path(expected_file) in all_filepaths
+
+    # Check that all downloaded annotations are as expected
+    compare_annotations_export(
+        Path(f"{pull_dir}/releases/{first_release_name}/annotations"),
+        Path(f"{pull_dir}/releases/{second_release_name}/annotations"),
+        item_type,
+        unzip=False,
+    )
+
+
+def test_full_cycle_multi_channel_item(
+    local_dataset: E2EDataset,
+    config_values: ConfigValues,
+):
+    """
+    This test performs the following steps:
+    - 1: Registers a multi-channel from external storage item to the dataset
+    - 2: Imports some annotations
+    - 3: Creates and pulls a release of the dataset
+    - 4: Deletes all items from the dataset
+    - 5: Pushes and imports the pulled files & annotations to the dataset
+    - 6: Deletes locally pulled copies of the dataset files
+    - 7: Creates and pulls a new release of the dataset
+    - 8: Assert that the pulled data is as expected
+
+    It is designed to catch errors that may arise from changes to exported Darwin JSON
+    """
+    item_type = "multi_channel"
+    annotation_format = "darwin"
+    first_release_name = "first_release"
+    second_release_name = "second_release"
+    pull_dir = Path(
+        f"{Path.home()}/.darwin/datasets/{config_values.team_slug}/{local_dataset.slug}"
+    )
+    annotations_import_dir = (
+        Path(__file__).parents[1]
+        / "data"
+        / "import"
+        / "multi_channel_annotations_with_slots_defined"
+    )
+    expected_filepaths = [
+        f"{pull_dir}/images/multi_channel_item/image_1.jpg/image_1.jpg",
+        f"{pull_dir}/images/multi_channel_item/image_2.jpg/image_2.jpg",
+        f"{pull_dir}/images/multi_channel_item/image_3.jpg/image_3.jpg",
+    ]
+
+    # Populate the dataset with items and annotations
+    local_dataset.register_read_only_items(config_values, item_type)
+    result = run_cli_command(
+        f"darwin dataset import {local_dataset.name} {annotation_format} {annotations_import_dir}"
+    )
+    assert_cli(result, 0)
+
+    # Pull a first release of the dataset
+    original_release = export_release(
+        annotation_format, local_dataset, config_values, release_name=first_release_name
+    )
+    result = run_cli_command(
+        f"darwin dataset pull {local_dataset.name}:{original_release.name}"
+    )
+    assert_cli(result, 0)
+
+    # Delete all items in the dataset
+    local_dataset.delete_items(config_values)
+
+    # Create a temporary directory for pushing files
+    tmp_push_dir = f"{pull_dir}/multi_channel_item"
+    copy_files_to_flat_directory(f"{pull_dir}/images", tmp_push_dir)
+
+    # Push and import the pulled files and annotations to the dataset
+    result = run_cli_command(
+        f"darwin dataset push {local_dataset.name} {tmp_push_dir} --item-merge-mode channels"
+    )
+    assert_cli(result, 0)
+    result = run_cli_command(
+        f"darwin dataset import {local_dataset.name} {annotation_format} {pull_dir}/releases/{first_release_name}/annotations"
+    )
+    assert_cli(result, 0)
+
+    # Delete local copies of the dataset files for the dataset
+    shutil.rmtree(f"{pull_dir}/images")
+
+    # Pull a second release of the dataset
+    new_release = export_release(
+        annotation_format,
+        local_dataset,
+        config_values,
+        release_name=second_release_name,
+    )
+    result = run_cli_command(
+        f"darwin dataset pull {local_dataset.name}:{new_release.name}"
+    )
+    assert_cli(result, 0)
+
+    # Check that all expected files have been downloaded
+    all_filepaths = list(pull_dir.rglob("*"))
+    for expected_file in expected_filepaths:
+        assert Path(expected_file) in all_filepaths
+
+    # Check that all downloaded annotations are as expected
+    compare_annotations_export(
+        Path(f"{pull_dir}/releases/{first_release_name}/annotations"),
+        Path(f"{pull_dir}/releases/{second_release_name}/annotations"),
+        item_type,
+        base_slot="image_1.jpg",
+        unzip=False,
+    )
diff --git a/e2e_tests/cli/test_push.py b/e2e_tests/cli/test_push.py
@@ -7,7 +7,7 @@
     wait_until_items_processed,
     list_items,
 )
-from e2e_tests.objects import E2EDataset, ConfigValues
+from e2e_tests.objects import E2EDataset, E2EItem, ConfigValues
 
 import tempfile
 import zipfile
@@ -25,12 +25,24 @@ def extract_and_push(
             )
             assert_cli(result, 0)
             wait_until_items_processed(config_values, local_dataset.id)
-            return list_items(
+            items = list_items(
                 config_values.api_key,
                 local_dataset.id,
                 config_values.team_slug,
                 config_values.server,
             )
+            for item in items:
+                local_dataset.add_item(
+                    E2EItem(
+                        name=item["name"],
+                        id=item["id"],
+                        path=item["path"],
+                        file_name=item["name"],
+                        slot_name=item["slots"][0]["file_name"],
+                        annotations=[],
+                    )
+                )
+            return items
 
 
 def test_push_mixed_filetypes(