Skip to content

Commit

Permalink
[DAR-4038][External] E2E annotation import tests for all supported fo…
Browse files Browse the repository at this point in the history
…rmats (#947)

* E2E test for importing annotations to videos

* Initial commit

* Test for importing non-Darwin JSON formats
  • Loading branch information
JBWilkie authored Oct 24, 2024
1 parent 27c2fde commit 3ead5c9
Show file tree
Hide file tree
Showing 14 changed files with 803 additions and 59 deletions.
230 changes: 187 additions & 43 deletions e2e_tests/cli/test_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,44 @@
list_items,
)
from e2e_tests.objects import E2EDataset, ConfigValues
from darwin.utils.utils import parse_darwin_json
import tempfile
import zipfile
import darwin.datatypes as dt
from typing import List, Dict, Optional, Union
import importlib
from typing import List, Dict, Optional, Union, Tuple, Any, Sequence


def compare_local_annotations_with_uploaded_annotations(
annotation_format: str,
local_dataset: E2EDataset,
config_values: ConfigValues,
) -> None:
"""
Checks that every annotation uploaded to every item of the given `local_dataset` is
of the expected type given the annotation format
This is necessary to verify that imports of formats that cannot be exported are successful
"""
expected_annotation_types = {
"csv_tags": "tag",
"csv_tags_video": "tag",
}
video_formats = ["csv_tags_video"]
expected_annotation_type = expected_annotation_types[annotation_format]
all_item_annotations, _, _ = local_dataset.get_annotation_data(config_values)
for item in local_dataset.items:
item_name = item.name
item_annotations = all_item_annotations[item_name]
for item_annotation in item_annotations:
if annotation_format in video_formats:
frame_indices = item_annotation["data"]["frames"].keys()
for frame_index in frame_indices:
assert (
expected_annotation_type
in item_annotation["data"]["frames"][frame_index]
)
else:
assert expected_annotation_type in item_annotation["data"]


def get_actual_annotation_filename(
Expand All @@ -36,8 +69,8 @@ def get_actual_annotation_filename(


def find_matching_actual_annotation(
expected_annotation: dt.Annotation,
actual_annotations: List[Union[dt.Annotation, dt.VideoAnnotation]],
expected_annotation: Union[dt.Annotation, dt.VideoAnnotation],
actual_annotations: Sequence[Union[dt.Annotation, dt.VideoAnnotation]],
) -> Union[dt.Annotation, dt.VideoAnnotation]:
"""
For a given expected annotation, finds the corresponding actual annotation
Expand Down Expand Up @@ -82,14 +115,19 @@ def assert_same_annotation_data(
For `dt.VideoAnnotation` objects:
Ensures that `expected_annotation.frames` is equivalent to `actual_annotation.frames`
"""
if isinstance(expected_annotation, dt.Annotation):
if isinstance(expected_annotation, dt.Annotation) and isinstance(
actual_annotation, dt.Annotation
):
assert expected_annotation.data == actual_annotation.data
elif isinstance(expected_annotation, dt.VideoAnnotation):
elif isinstance(expected_annotation, dt.VideoAnnotation) and isinstance(
actual_annotation, dt.VideoAnnotation
):
assert expected_annotation.frames == actual_annotation.frames


def assert_same_annotation_properties(
expected_annotation: dt.Annotation, actual_annotation: dt.Annotation
expected_annotation: Union[dt.Annotation, dt.VideoAnnotation],
actual_annotation: Union[dt.Annotation, dt.VideoAnnotation],
) -> None:
"""
Ensures that `expected_annotation.properties` is equivalent to `actual_annotation.properties`
Expand All @@ -99,7 +137,7 @@ def assert_same_annotation_properties(
actual_properties = actual_annotation.properties
assert actual_properties is not None
for expected_property in expected_properties:
assert expected_property in actual_properties # type : ignore
assert expected_property in actual_properties


def get_base_slot_name_of_item(
Expand All @@ -120,9 +158,37 @@ def get_base_slot_name_of_item(
return item["slots"][0]["slot_name"]


def parse_expected_and_actual_annotations(
expected_annotation_files,
actual_annotation_files,
expected_filename: str = "",
actual_filename: str = "",
annotation_format: str = "",
) -> Tuple[List[dt.AnnotationFile], List[dt.AnnotationFile]]:
"""
Parses and returns exported & actual annotation files in a given format.
"""
importer_module = importlib.import_module(
f"darwin.importer.formats.{annotation_format}"
)
expected_annotation_data = importer_module.parse_path(
Path(expected_annotation_files[expected_filename])
)
actual_annotation_data = importer_module.parse_path(
Path(actual_annotation_files[actual_filename])
)

if not isinstance(expected_annotation_data, list):
expected_annotation_data = [expected_annotation_data]
if not isinstance(actual_annotation_data, list):
actual_annotation_data = [actual_annotation_data]

return expected_annotation_data, actual_annotation_data


def assert_same_annotation_slot_name(
expected_annotation: dt.Annotation,
actual_annotation: dt.Annotation,
expected_annotation: Union[dt.Annotation, dt.VideoAnnotation],
actual_annotation: Union[dt.Annotation, dt.VideoAnnotation],
item_type: str,
base_slot: Optional[str],
) -> None:
Expand All @@ -144,8 +210,8 @@ def assert_same_annotation_slot_name(


def assert_same_item_level_properties(
expected_item_level_properties: List[Dict[str, str]],
actual_item_level_properties: List[Dict[str, str]],
expected_item_level_properties: List[Dict[str, Any]],
actual_item_level_properties: List[Dict[str, Any]],
) -> None:
"""
Ensures that all expected item-level properties are present in exported item-level properties
Expand All @@ -159,6 +225,7 @@ def compare_annotations_export(
expected_annotations_dir: Path,
item_type: str,
base_slot: Optional[str] = "0",
annotation_format: str = "darwin",
):
"""
Compares a set of downloaded annotation files with the imported files that resulted
Expand All @@ -184,44 +251,51 @@ def compare_annotations_export(
actual_filename = get_actual_annotation_filename(
expected_filename, actual_annotation_files
)
expected_annotation_data: List[dt.Annotation] = parse_darwin_json(
Path(expected_annotation_files[expected_filename]) # type: ignore
)
expected_annotations = expected_annotation_data.annotations # type: ignore
expected_item_level_properties = (
expected_annotation_data.item_properties # type: ignore
)

actual_annotation_data: List[dt.Annotation] = parse_darwin_json(
Path(actual_annotation_files[actual_filename]) # type: ignore
)
actual_annotations = actual_annotation_data.annotations # type: ignore
actual_item_level_properties = (
actual_annotation_data.item_properties # type: ignore
expected_annotation_data, actual_annotation_data = (
parse_expected_and_actual_annotations(
expected_annotation_files,
actual_annotation_files,
expected_filename,
actual_filename,
annotation_format,
)
)
for idx, expected_annotation_file in enumerate(expected_annotation_data):
actual_annotation_file = actual_annotation_data[idx]
expected_annotations = expected_annotation_file.annotations
actual_annotations = actual_annotation_file.annotations
expected_item_level_properties = (
expected_annotation_file.item_properties or []
)
actual_item_level_properties = actual_annotation_file.item_properties or []

delete_annotation_uuids(expected_annotations)
delete_annotation_uuids(actual_annotations)
delete_annotation_uuids(expected_annotations)
delete_annotation_uuids(actual_annotations)

assert_same_item_level_properties(
expected_item_level_properties, actual_item_level_properties
)
for expected_annotation in expected_annotations:
actual_annotation = find_matching_actual_annotation(
expected_annotation, actual_annotations
)
assert_same_annotation_data(expected_annotation, actual_annotation)
assert_same_annotation_properties(expected_annotation, actual_annotation)
assert_same_annotation_slot_name(
expected_annotation, actual_annotation, item_type, base_slot
assert_same_item_level_properties(
expected_item_level_properties, actual_item_level_properties
)
for expected_annotation in expected_annotations:
actual_annotation = find_matching_actual_annotation(
expected_annotation, actual_annotations
)
assert_same_annotation_data(expected_annotation, actual_annotation)
assert_same_annotation_properties(
expected_annotation, actual_annotation
)
assert_same_annotation_slot_name(
expected_annotation, actual_annotation, item_type, base_slot
)


def run_import_test(
local_dataset: E2EDataset,
config_values: ConfigValues,
item_type: str,
annotations_subdir: str,
annotation_format: Optional[str] = "darwin",
files_in_flat_structure: bool = False,
export_only: Optional[bool] = False,
item_name: Optional[str] = None,
additional_flags: str = "",
exit_code: int = 0,
Expand All @@ -231,12 +305,14 @@ def run_import_test(
"""
Helper function to run import tests for different item types and annotation configurations.
"""
local_dataset.register_read_only_items(config_values, item_type)
local_dataset.register_read_only_items(
config_values, item_type, files_in_flat_structure
)
expected_annotations_dir = (
Path(__file__).parents[1] / "data" / "import" / annotations_subdir
)
result = run_cli_command(
f"darwin dataset import {local_dataset.name} darwin {expected_annotations_dir} {additional_flags}"
f"darwin dataset import {local_dataset.name} {annotation_format} {expected_annotations_dir} {additional_flags}"
)
assert_cli(result, exit_code)

Expand All @@ -247,6 +323,12 @@ def run_import_test(
assert expect_error in result.stdout
return

if export_only:
compare_local_annotations_with_uploaded_annotations(
annotation_format, local_dataset, config_values # type: ignore
)
return

base_slot = (
get_base_slot_name_of_item(config_values, local_dataset.id, item_name)
if item_name
Expand All @@ -255,10 +337,17 @@ def run_import_test(
with tempfile.TemporaryDirectory() as tmp_dir_str:
actual_annotations_dir = Path(tmp_dir_str)
export_and_download_annotations(
actual_annotations_dir, local_dataset, config_values
actual_annotations_dir,
annotation_format, # type: ignore
local_dataset,
config_values,
)
compare_annotations_export(
actual_annotations_dir, expected_annotations_dir, item_type, base_slot
actual_annotations_dir,
expected_annotations_dir,
item_type,
base_slot,
annotation_format, # type: ignore
)


Expand Down Expand Up @@ -465,3 +554,58 @@ def test_import_annotations_with_subtypes_to_videos(
item_type="single_slotted_video",
annotations_subdir="video_annotations_with_subtypes",
)


def test_importing_coco_annotations(
local_dataset: E2EDataset, config_values: ConfigValues
) -> None:
annotation_format = "coco"
run_import_test(
local_dataset,
config_values,
item_type="single_slotted",
annotations_subdir="coco_annotations",
annotation_format=annotation_format,
files_in_flat_structure=True,
)


def test_importing_csv_tags_annotations(
local_dataset: E2EDataset, config_values: ConfigValues
) -> None:
annotation_format = "csv_tags"
run_import_test(
local_dataset,
config_values,
item_type="single_slotted",
annotations_subdir="csv_tag_annotations",
annotation_format=annotation_format,
export_only=True,
)


def test_importing_csv_tags_video_annotations(
local_dataset: E2EDataset, config_values: ConfigValues
) -> None:
annotation_format = "csv_tags_video"
run_import_test(
local_dataset,
config_values,
item_type="single_slotted_video",
annotations_subdir="csv_tag_video_annotations",
annotation_format=annotation_format,
export_only=True,
)


def test_importing_pascal_voc_annotations(
local_dataset: E2EDataset, config_values: ConfigValues
) -> None:
annotation_format = "pascal_voc"
run_import_test(
local_dataset,
config_values,
item_type="single_slotted",
annotations_subdir="pascal_voc_annotations",
annotation_format=annotation_format,
)
Loading

0 comments on commit 3ead5c9

Please sign in to comment.