Skip to content

Commit

Permalink
Generic media support (#539)
Browse files Browse the repository at this point in the history
- Added `DatasetItem.media` to replace dedicated members for each media type
- Added the `PointCloud` media type
- Added the `media_type()` method to `Extractor`s
- Added merging for all media types, mixed media types for an item or in the dataset produce an error
- Datasets can't have mixed media types in items. If such situation occurs, an error is raised (checked during dataset caching/iteration)
- Datasets can't change media type using transforms
- Extractors must report their media type with the `media_type()` method
- Added a new mandatory `media_type` argument to `Dataset.from_iterable`. It has a default value of `Image` for the transition period (to be tracked in #675).
- Deprecated `DatasetItem.image`, `.related_images`, `.point_cloud`, `save-images` and `require_images`
- Added deprecation messages about annotation classes in `components.extractor`
- Suppressed Datumaro deprecation messages when using Datumaro from CLI

Co-authored-by: yasakova-anastasia <[email protected]>
  • Loading branch information
Maxim Zhiltsov and yasakova-anastasia authored Mar 9, 2022
1 parent b30bf48 commit b7d83d5
Show file tree
Hide file tree
Showing 157 changed files with 3,362 additions and 2,267 deletions.
12 changes: 10 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## \[Unreleased\]
### Added
- TBD
- Support for custom media types, new `PointCloud` media type,
`DatasetItem.media` and `.media_as(type)` members
(<https://github.com/openvinotoolkit/datumaro/pull/539>)
- \[API\] A way to request dataset and extractor media type with `media_type`
(<https://github.com/openvinotoolkit/datumaro/pull/539>)

### Changed
- TBD

### Deprecated
- TBD
- `--save-images` is replaced with `--save-media` in CLI and converter API
(<https://github.com/openvinotoolkit/datumaro/pull/539>)
- \[API\] `image`, `point_cloud` and `related_images` of `DatasetItem` are
replaced with `media` and `media_as(type)` members and c-tor parameters
(<https://github.com/openvinotoolkit/datumaro/pull/539>)

### Removed
- TBD
Expand Down
5 changes: 5 additions & 0 deletions datumaro/cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import logging as log
import os.path as osp
import sys
import warnings

from ..util.telemetry_utils import (
close_telemetry_session, init_telemetry_session,
Expand Down Expand Up @@ -39,6 +40,10 @@ def init_logger(cls, args=None):
log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
level=args.loglevel)

# Suppress own deprecation warnings
warnings.filterwarnings('ignore', category=DeprecationWarning,
module=r'datumaro\..*')

@staticmethod
def _define_loglevel_option(parser):
parser.add_argument('--loglevel', type=loglevel, default='info',
Expand Down
2 changes: 1 addition & 1 deletion datumaro/cli/commands/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def build_parser(parser_ctor=argparse.ArgumentParser):
|n
Examples:|n
- Download the MNIST dataset:|n
|s|s%(prog)s -i tfds:mnist -- --save-images|n
|s|s%(prog)s -i tfds:mnist -- --save-media|n
|n
- Download the VOC 2012 dataset, saving only the annotations in the COCO
format into a specific directory:|n
Expand Down
12 changes: 6 additions & 6 deletions datumaro/cli/contexts/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,10 +371,10 @@ def filter_command(args):

# Source might be missing in the working dir, so we specify
# the output directory.
# We specify save_images here as a heuristic. It can probably
# We specify save_media here as a heuristic. It can probably
# be improved by checking if there are images in the dataset
# directory.
dataset.save(project.source_data_dir(target), save_images=True)
dataset.save(project.source_data_dir(target), save_media=True)

log.info("Finished")
else:
Expand All @@ -389,7 +389,7 @@ def filter_command(args):
dst_dir = osp.abspath(dst_dir)

dataset.filter(filter_expr, *filter_args)
dataset.save(dst_dir, save_images=True)
dataset.save(dst_dir, save_media=True)

log.info("Results have been saved to '%s'" % dst_dir)

Expand Down Expand Up @@ -557,10 +557,10 @@ def transform_command(args):

# Source might be missing in the working dir, so we specify
# the output directory
# We specify save_images here as a heuristic. It can probably
# We specify save_media here as a heuristic. It can probably
# be improved by checking if there are images in the dataset
# directory.
dataset.save(project.source_data_dir(target), save_images=True)
dataset.save(project.source_data_dir(target), save_media=True)

log.info("Finished")
else:
Expand All @@ -575,7 +575,7 @@ def transform_command(args):
dst_dir = osp.abspath(dst_dir)

dataset.transform(args.transform, **extra_args)
dataset.save(dst_dir, save_images=True)
dataset.save(dst_dir, save_media=True)

log.info("Results have been saved to '%s'" % dst_dir)

Expand Down
11 changes: 8 additions & 3 deletions datumaro/cli/contexts/project/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import cv2
import numpy as np

from datumaro.components.media import Image

with warnings.catch_warnings():
warnings.simplefilter("ignore")
import tensorboardX as tb
Expand Down Expand Up @@ -121,7 +123,10 @@ def save(self, a: IDataset, b: IDataset):
self.update_mask_confusion(mask_diff)

self.save_item_label_diff(item_a, item_b, label_diff)
self.save_item_bbox_diff(item_a, item_b, bbox_diff)

if a.media_type() and issubclass(a.media_type(), Image) and \
b.media_type() and issubclass(b.media_type(), Image):
self.save_item_bbox_diff(item_a, item_b, bbox_diff)

if len(self.label_confusion_matrix) != 0:
self.save_conf_matrix(self.label_confusion_matrix,
Expand Down Expand Up @@ -243,11 +248,11 @@ def save_item_bbox_diff(self, item_a, item_b, diff):
_, mispred, a_unmatched, b_unmatched = diff

if 0 < len(a_unmatched) + len(b_unmatched) + len(mispred):
if not item_a.has_image or not item_a.image.has_data:
if not isinstance(item_a.media, Image) or not item_a.media.has_data:
log.warning("Item %s: item has no image data, "
"it will be skipped" % (item_a.id))
return
img_a = item_a.image.data.copy()
img_a = item_a.media.data.copy()
img_b = img_a.copy()
for a_bbox, b_bbox in mispred:
self.draw_bbox(img_a, a_bbox, self.get_a_label(a_bbox.label),
Expand Down
49 changes: 36 additions & 13 deletions datumaro/components/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,17 @@
import os
import os.path as osp
import shutil
import warnings

from attrs import define, field
import attr

from datumaro.components.cli_plugin import CliPlugin
from datumaro.components.errors import (
AnnotationExportError, DatumaroError, ItemExportError,
AnnotationExportError, DatasetExportError, DatumaroError, ItemExportError,
)
from datumaro.components.extractor import DatasetItem, IExtractor
from datumaro.components.media import Image
from datumaro.components.media import Image, PointCloud
from datumaro.components.progress_reporting import (
NullProgressReporter, ProgressReporter,
)
Expand Down Expand Up @@ -89,8 +90,16 @@ class Converter(CliPlugin):
@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)

# Deprecated
parser.add_argument('--save-images', action='store_true',
help="Save images (default: %(default)s)")
default=None,
help="(Deprecated. Use --save-media instead) "
"Save images (default: False)")

parser.add_argument('--save-media', action='store_true',
default=None, # TODO: remove default once save-images is removed
help="Save media (default: False)")
parser.add_argument('--image-ext', default=None,
help="Image extension (default: keep or use format default%s)" % \
(' ' + cls.DEFAULT_IMAGE_EXT if cls.DEFAULT_IMAGE_EXT else ''))
Expand Down Expand Up @@ -138,7 +147,8 @@ def apply(self):
raise NotImplementedError("Should be implemented in a subclass")

def __init__(self, extractor: IExtractor, save_dir: str, *,
save_images: bool = False,
save_images=None, # Deprecated
save_media: Optional[bool] = None,
image_ext: Optional[str] = None,
default_image_ext: Optional[str] = None,
save_dataset_meta: bool = False,
Expand All @@ -147,7 +157,20 @@ def __init__(self, extractor: IExtractor, save_dir: str, *,
assert default_image_ext
self._default_image_ext = default_image_ext

self._save_images = save_images
if save_images is not None and save_media is not None:
raise DatasetExportError("Can't use both 'save-media' and "
"'save-images'")

if save_media is not None:
self._save_media = save_media
elif save_images is not None:
self._save_media = save_images
warnings.warn("'save-images' is deprecated and will be "
"removed in future. Use 'save-media' instead.",
DeprecationWarning, stacklevel=2)
else:
self._save_media = False

self._image_ext = image_ext

self._extractor = extractor
Expand All @@ -168,8 +191,8 @@ def __init__(self, extractor: IExtractor, save_dir: str, *,
def _find_image_ext(self, item: Union[DatasetItem, Image]):
src_ext = None

if isinstance(item, DatasetItem) and item.has_image:
src_ext = item.image.ext
if isinstance(item, DatasetItem) and isinstance(item.media, Image):
src_ext = item.media.ext
elif isinstance(item, Image):
src_ext = item.ext

Expand All @@ -192,7 +215,7 @@ def _save_image(self, item, path=None, *,
assert not ((subdir or name or basedir) and path), \
"Can't use both subdir or name or basedir and path arguments"

if not item.has_image or not item.image.has_data:
if not isinstance(item.media, Image) or not item.media.has_data:
log.warning("Item '%s' has no image", item.id)
return

Expand All @@ -201,14 +224,14 @@ def _save_image(self, item, path=None, *,
self._make_image_filename(item, name=name, subdir=subdir))
path = osp.abspath(path)

item.image.save(path)
item.media.save(path)

def _save_point_cloud(self, item=None, path=None, *,
name=None, subdir=None, basedir=None):
assert not ((subdir or name or basedir) and path), \
"Can't use both subdir or name or basedir and path arguments"

if not item.point_cloud:
if not item.media or not isinstance(item.media, PointCloud):
log.warning("Item '%s' has no pcd", item.id)
return

Expand All @@ -218,9 +241,9 @@ def _save_point_cloud(self, item=None, path=None, *,
path = osp.abspath(path)

os.makedirs(osp.dirname(path), exist_ok=True)
if item.point_cloud and osp.isfile(item.point_cloud):
if item.point_cloud != path:
shutil.copyfile(item.point_cloud, path)
if item.media and osp.isfile(item.media.path):
if item.media.path != path:
shutil.copyfile(item.media.path, path)

def _save_meta_file(self, path):
save_meta_file(path, self._extractor.categories())
Loading

0 comments on commit b7d83d5

Please sign in to comment.