Skip to content

Commit

Permalink
Allow download of frames extracted from videos & multi-slotted items
Browse files Browse the repository at this point in the history
  • Loading branch information
JBWilkie committed Nov 21, 2024
1 parent 59561dc commit d06e754
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 15 deletions.
43 changes: 41 additions & 2 deletions darwin/dataset/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@
from rich.console import Console

import darwin.datatypes as dt
from darwin.dataset.utils import sanitize_filename
from darwin.dataset.utils import (
sanitize_filename,
SUPPORTED_IMAGE_EXTENSIONS,
SUPPORTED_VIDEO_EXTENSIONS,
)
from darwin.datatypes import AnnotationFile
from darwin.exceptions import MissingDependency
from darwin.utils import (
Expand Down Expand Up @@ -670,9 +674,44 @@ def _get_planned_image_paths(
return [images_path / filename]
else:
for slot in annotation.slots:
if len(slot.source_files) > 1:
# Check that the item is either a DICOM series or a frame extracted from a video
is_dicom_series = all(
source_file["file_name"].lower().endswith(".dcm") # type: ignore
for source_file in slot.source_files
)
is_extracted_frame = (
len(slot.source_files) == 2
and any(
source_file["file_name"].lower().endswith(ext) # type: ignore
for ext in SUPPORTED_VIDEO_EXTENSIONS
for source_file in slot.source_files
)
and any(
source_file["file_name"].lower().endswith(ext) # type: ignore
for ext in SUPPORTED_IMAGE_EXTENSIONS
for source_file in slot.source_files
)
)
if is_extracted_frame:
# Select only the image if it's an extracted frame
frame_source_file = next(
source_file
for source_file in slot.source_files
if any(
source_file["file_name"].lower().endswith(ext) # type: ignore
for ext in SUPPORTED_IMAGE_EXTENSIONS
)
)
slot.source_files = [frame_source_file]
if not is_dicom_series and not is_extracted_frame:
raise ValueError(
"This slot contains data that is not a DICOM series or a frame extracted from a video"
)

slot_name = Path(slot.name)
for source_file in slot.source_files:
file_name = source_file.file_name
file_name = source_file["file_name"] # type: ignore
if use_folders and annotation.remote_path != "/":
file_paths.append(
images_path
Expand Down
90 changes: 77 additions & 13 deletions tests/darwin/dataset/download_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,14 @@ def test_multi_slot_without_folders_planned_image_paths():
Slot(
name="slot1",
type="image",
source_files=[SourceFile(file_name="source_name_1.jpg")],
source_files=[
{"file_name": "source_name_1.jpg"},
], # type: ignore
),
Slot(
name="slot2",
type="image",
source_files=[SourceFile(file_name="source_name_2.jpg")],
source_files=[{"file_name": "source_name_2.jpg"}], # type: ignore
),
],
remote_path="/",
Expand All @@ -166,12 +168,12 @@ def test_multi_slot_with_folders_planned_image_path():
Slot(
name="slot1",
type="image",
source_files=[SourceFile(file_name="source_name_1.jpg")],
source_files=[{"file_name": "source_name_1.jpg"}], # type: ignore
),
Slot(
name="slot2",
type="image",
source_files=[SourceFile(file_name="source_name_2.jpg")],
source_files=[{"file_name": "source_name_2.jpg"}], # type: ignore
),
],
remote_path="/remote/path",
Expand Down Expand Up @@ -208,34 +210,96 @@ def test_single_slot_root_path_with_folders_planned_image_paths():
assert result == expected


def test_multiple_source_files_planned_image_paths():
def test_dicom_series_planned_image_paths():
annotation = AnnotationFile(
path=Path("/local/annotations/image.json"),
filename="image.jpg",
path=Path("/local/annotations/series.json"),
filename="series.dcm",
annotation_classes={
AnnotationClass(name="test_class", annotation_type="polygon")
},
annotations=[],
slots=[
Slot(
name="slot1",
type="image",
type="dicom",
source_files=[
SourceFile(file_name="source_name_1.jpg"),
SourceFile(file_name="source_name_2.jpg"),
],
{"file_name": "slice_1.dcm"},
{"file_name": "slice_2.dcm"},
{"file_name": "slice_3.dcm"},
], # type: ignore
)
],
remote_path="/",
)
images_path = Path("/local/images")
results = dm._get_planned_image_paths(annotation, images_path, use_folders=False)
expected = [
images_path / "image.jpg" / "slot1" / "source_name_1.jpg",
images_path / "image.jpg" / "slot1" / "source_name_2.jpg",
images_path / "series.dcm" / "slot1" / "slice_1.dcm",
images_path / "series.dcm" / "slot1" / "slice_2.dcm",
images_path / "series.dcm" / "slot1" / "slice_3.dcm",
]
assert results == expected


def test_extracted_frames_planned_image_paths():
annotation = AnnotationFile(
path=Path("/local/annotations/video.json"),
filename="video.mp4",
annotation_classes={
AnnotationClass(name="test_class", annotation_type="polygon")
},
annotations=[],
slots=[
Slot(
name="0",
type="image",
source_files=[
{"file_name": "frame_0.jpg"},
{"file_name": "video.mp4"},
], # type: ignore
),
],
remote_path="/",
)
images_path = Path("/local/images")
results = dm._get_planned_image_paths(annotation, images_path, use_folders=False)
expected = [
images_path / "video.mp4" / "0" / "frame_0.jpg",
]
assert results == expected


def test_multiple_source_files_raises_error():
annotation = AnnotationFile(
path=Path("/local/annotations/image.json"),
filename="image.jpg",
annotation_classes={
AnnotationClass(name="test_class", annotation_type="polygon")
},
annotations=[],
slots=[
Slot(
name="slot1",
type="image",
source_files=[
{"file_name": "image1.jpg"},
{"file_name": "image2.jpg"},
], # type: ignore
)
],
remote_path="/",
)
images_path = Path("/local/images")

with pytest.raises(ValueError) as exc_info:
dm._get_planned_image_paths(annotation, images_path, use_folders=False)

assert (
str(exc_info.value)
== "This slot contains data that is not a DICOM series or a frame extracted from a video"
)


def test__remove_empty_directories(tmp_path: Path) -> None:
root_dir = tmp_path / "root"
root_dir.mkdir()
Expand Down

0 comments on commit d06e754

Please sign in to comment.