From 9c2909544ea7c404239b9abc4fc882d63cb2a52c Mon Sep 17 00:00:00 2001 From: Yauheni Kachan <19803638+bagxi@users.noreply.github.com> Date: Thu, 7 May 2020 10:36:45 +0300 Subject: [PATCH] move torchvision to catalyst[cv] (#738) * settings PoC - step 1 * fix codestyle * settings PoC - step 2 - replace env params * mv catalyst/utils/tools to catalyst/tools * mv cv to submodule * remove imread from 'SummaryReader' * fix imports * tests poc * fix requirements in test * fixes * fixes * fix code style, sort settings params * replace torchvision * fixes * fixes * check * Update requirements.txt * renaming * bin update * Update requirements.txt Co-authored-by: Sergey Kolesnikov --- CHANGELOG.md | 5 +- Makefile | 2 +- bin/tests/check_dl_core.sh | 2 +- bin/tests/check_dl_core_callbacks.sh | 6 +- bin/tests/check_dl_core_settings.sh | 191 ++++++++++++ .../_check_docs.sh => tests/check_docs.sh} | 0 catalyst/contrib/__main__.py | 4 +- catalyst/contrib/data/cv/__init__.py | 1 + catalyst/contrib/data/cv/reader.py | 92 ++++++ catalyst/contrib/data/dataset/README.md | 1 + catalyst/contrib/data/dataset/__init__.py | 8 + catalyst/contrib/data/dataset/mnist.py | 271 +++++++++++++++++ catalyst/contrib/data/dataset/transforms.py | 151 ++++++++++ catalyst/contrib/data/dataset/utils.py | 140 +++++++++ catalyst/contrib/dl/__init__.py | 5 - catalyst/contrib/dl/callbacks/__init__.py | 21 +- catalyst/contrib/dl/callbacks/inference.py | 126 ++++++++ catalyst/contrib/dl/callbacks/optimizer.py | 2 +- .../contrib/dl/callbacks/telegram_logger.py | 8 +- .../tests/test_optimizer_callback.py | 5 +- catalyst/contrib/nn/modules/lama.py | 2 +- catalyst/contrib/nn/schedulers/base.py | 2 +- catalyst/contrib/nn/schedulers/onecycle.py | 2 +- catalyst/contrib/registry.py | 16 +- catalyst/contrib/utils/__init__.py | 28 +- catalyst/contrib/utils/compression.py | 23 +- catalyst/contrib/utils/cv/__init__.py | 32 ++ catalyst/contrib/utils/{ => cv}/image.py | 78 +---- catalyst/contrib/utils/cv/tensor.py | 68 +++++ catalyst/contrib/utils/cv/tests/__init__.py | 0 .../utils/{ => cv}/tests/test_image.py | 4 +- catalyst/contrib/utils/nlp/__init__.py | 23 ++ catalyst/contrib/utils/{ => nlp}/text.py | 0 catalyst/contrib/utils/serialization.py | 16 +- catalyst/contrib/utils/tests/test_pandas.py | 2 +- catalyst/contrib/utils/tools/tensorboard.py | 21 +- .../utils/tools/tests/test_tensorboard.py | 31 +- catalyst/contrib/utils/visualization.py | 2 +- catalyst/core/callbacks/early_stop.py | 11 +- catalyst/core/callbacks/optimizer.py | 2 +- catalyst/core/callbacks/timer.py | 2 +- catalyst/core/experiment.py | 2 +- catalyst/core/registry.py | 2 +- catalyst/core/runner.py | 23 +- catalyst/core/state.py | 18 +- catalyst/data/__init__.py | 9 +- catalyst/data/__main__.py | 39 ++- catalyst/data/reader.py | 88 +----- catalyst/data/scripts/image2embedding.py | 37 ++- catalyst/data/scripts/process_images.py | 27 +- catalyst/data/scripts/split_dataframe.py | 10 +- catalyst/data/scripts/tag2label.py | 20 +- catalyst/data/scripts/text2embedding.py | 2 +- catalyst/dl/callbacks/__init__.py | 2 +- catalyst/dl/callbacks/inference.py | 122 +------- catalyst/dl/experiment/config.py | 2 +- catalyst/dl/experiment/core.py | 6 +- catalyst/dl/experiment/supervised.py | 2 +- catalyst/dl/registry.py | 2 +- catalyst/dl/runner/core.py | 2 +- catalyst/dl/runner/supervised.py | 2 +- catalyst/dl/scripts/trace.py | 2 +- catalyst/dl/utils/trace.py | 2 +- catalyst/{utils => }/tools/__init__.py | 1 + catalyst/{utils => }/tools/frozen_class.py | 0 catalyst/{utils => }/tools/registry.py | 0 catalyst/tools/settings.py | 280 ++++++++++++++++++ catalyst/{utils => }/tools/time_manager.py | 0 catalyst/{utils => }/tools/typing.py | 0 catalyst/utils/distributed.py | 2 +- catalyst/utils/metrics/accuracy.py | 2 +- catalyst/utils/metrics/dice.py | 2 +- catalyst/utils/metrics/f1_score.py | 2 +- catalyst/utils/metrics/iou.py | 2 +- catalyst/utils/tests/test_registry.py | 2 +- catalyst/utils/tools/settings.py | 11 - catalyst/utils/torch.py | 2 +- docs/api/contrib.rst | 9 +- docs/api/utils.rst | 8 +- docs/info/distributed.rst | 8 +- requirements/requirements-contrib.txt | 1 + requirements/requirements-cv.txt | 6 +- requirements/requirements.txt | 7 - tests/_tests_cv_classification/experiment.py | 14 +- .../test1/experiment.py | 14 +- .../test2/experiments/SimpleExperiment1.py | 14 +- .../test2/experiments/SimpleExperiment2.py | 14 +- .../experiment.py | 5 +- tests/_tests_dl_callbacks/experiment.py | 13 +- ...z_segmentation.py => cv_z_segmentation.py} | 24 +- .../{z_unets.py => cv_z_unets.py} | 0 ...assification.py => dl_z_classification.py} | 14 +- ...nctional.py => dl_z_contirb_functional.py} | 0 ...stributed_01.py => dl_z_distributed_01.py} | 0 ...stributed_02.py => dl_z_distributed_02.py} | 0 ...stributed_03.py => dl_z_distributed_03.py} | 0 ...stributed_04.py => dl_z_distributed_04.py} | 0 ...stributed_05.py => dl_z_distributed_05.py} | 0 ...stributed_06.py => dl_z_distributed_06.py} | 0 ...stributed_07.py => dl_z_distributed_07.py} | 0 ...stributed_08.py => dl_z_distributed_08.py} | 0 ...stributed_09.py => dl_z_distributed_09.py} | 0 ...stributed_10.py => dl_z_distributed_10.py} | 0 ...stributed_11.py => dl_z_distributed_11.py} | 0 ...stributed_12.py => dl_z_distributed_12.py} | 0 ...stributed_13.py => dl_z_distributed_13.py} | 0 ...stributed_14.py => dl_z_distributed_14.py} | 0 ...stributed_15.py => dl_z_distributed_15.py} | 0 ...stributed_16.py => dl_z_distributed_16.py} | 0 ...stributed_17.py => dl_z_distributed_17.py} | 0 ...ibuted_0.py => dl_z_docs_distributed_0.py} | 0 ...ibuted_1.py => dl_z_docs_distributed_1.py} | 0 ...ibuted_2.py => dl_z_docs_distributed_2.py} | 0 ...ibuted_3.py => dl_z_docs_distributed_3.py} | 0 ...ae.py => dl_z_mvp_distributed_mnist_ae.py} | 8 +- .../{z_mvp_mnist.py => dl_z_mvp_mnist.py} | 3 +- ...z_mvp_mnist_ae.py => dl_z_mvp_mnist_ae.py} | 13 +- ...mvp_mnist_gan.py => dl_z_mvp_mnist_gan.py} | 8 +- ...p_mnist_unet.py => dl_z_mvp_mnist_unet.py} | 13 +- ...mvp_mnist_vae.py => dl_z_mvp_mnist_vae.py} | 13 +- ...mvp_projector.py => dl_z_mvp_projector.py} | 0 121 files changed, 1665 insertions(+), 677 deletions(-) create mode 100644 bin/tests/check_dl_core_settings.sh rename bin/{codestyle/_check_docs.sh => tests/check_docs.sh} (100%) create mode 100644 catalyst/contrib/data/cv/reader.py create mode 100644 catalyst/contrib/data/dataset/README.md create mode 100644 catalyst/contrib/data/dataset/__init__.py create mode 100644 catalyst/contrib/data/dataset/mnist.py create mode 100644 catalyst/contrib/data/dataset/transforms.py create mode 100644 catalyst/contrib/data/dataset/utils.py create mode 100644 catalyst/contrib/dl/callbacks/inference.py create mode 100644 catalyst/contrib/utils/cv/__init__.py rename catalyst/contrib/utils/{ => cv}/image.py (69%) create mode 100644 catalyst/contrib/utils/cv/tensor.py create mode 100644 catalyst/contrib/utils/cv/tests/__init__.py rename catalyst/contrib/utils/{ => cv}/tests/test_image.py (92%) create mode 100644 catalyst/contrib/utils/nlp/__init__.py rename catalyst/contrib/utils/{ => nlp}/text.py (100%) rename catalyst/{utils => }/tools/__init__.py (81%) rename catalyst/{utils => }/tools/frozen_class.py (100%) rename catalyst/{utils => }/tools/registry.py (100%) create mode 100644 catalyst/tools/settings.py rename catalyst/{utils => }/tools/time_manager.py (100%) rename catalyst/{utils => }/tools/typing.py (100%) delete mode 100644 catalyst/utils/tools/settings.py rename tests/_tests_scripts/{z_segmentation.py => cv_z_segmentation.py} (90%) rename tests/_tests_scripts/{z_unets.py => cv_z_unets.py} (100%) rename tests/_tests_scripts/{z_classification.py => dl_z_classification.py} (96%) rename tests/_tests_scripts/{z_dl_contirb_functional.py => dl_z_contirb_functional.py} (100%) rename tests/_tests_scripts/{z_distributed_01.py => dl_z_distributed_01.py} (100%) rename tests/_tests_scripts/{z_distributed_02.py => dl_z_distributed_02.py} (100%) rename tests/_tests_scripts/{z_distributed_03.py => dl_z_distributed_03.py} (100%) rename tests/_tests_scripts/{z_distributed_04.py => dl_z_distributed_04.py} (100%) rename tests/_tests_scripts/{z_distributed_05.py => dl_z_distributed_05.py} (100%) rename tests/_tests_scripts/{z_distributed_06.py => dl_z_distributed_06.py} (100%) rename tests/_tests_scripts/{z_distributed_07.py => dl_z_distributed_07.py} (100%) rename tests/_tests_scripts/{z_distributed_08.py => dl_z_distributed_08.py} (100%) rename tests/_tests_scripts/{z_distributed_09.py => dl_z_distributed_09.py} (100%) rename tests/_tests_scripts/{z_distributed_10.py => dl_z_distributed_10.py} (100%) rename tests/_tests_scripts/{z_distributed_11.py => dl_z_distributed_11.py} (100%) rename tests/_tests_scripts/{z_distributed_12.py => dl_z_distributed_12.py} (100%) rename tests/_tests_scripts/{z_distributed_13.py => dl_z_distributed_13.py} (100%) rename tests/_tests_scripts/{z_distributed_14.py => dl_z_distributed_14.py} (100%) rename tests/_tests_scripts/{z_distributed_15.py => dl_z_distributed_15.py} (100%) rename tests/_tests_scripts/{z_distributed_16.py => dl_z_distributed_16.py} (100%) rename tests/_tests_scripts/{z_distributed_17.py => dl_z_distributed_17.py} (100%) rename tests/_tests_scripts/{z_docs_distributed_0.py => dl_z_docs_distributed_0.py} (100%) rename tests/_tests_scripts/{z_docs_distributed_1.py => dl_z_docs_distributed_1.py} (100%) rename tests/_tests_scripts/{z_docs_distributed_2.py => dl_z_docs_distributed_2.py} (100%) rename tests/_tests_scripts/{z_docs_distributed_3.py => dl_z_docs_distributed_3.py} (100%) rename tests/_tests_scripts/{z_mvp_distributed_mnist_ae.py => dl_z_mvp_distributed_mnist_ae.py} (93%) rename tests/_tests_scripts/{z_mvp_mnist.py => dl_z_mvp_mnist.py} (95%) rename tests/_tests_scripts/{z_mvp_mnist_ae.py => dl_z_mvp_mnist_ae.py} (86%) rename tests/_tests_scripts/{z_mvp_mnist_gan.py => dl_z_mvp_mnist_gan.py} (95%) rename tests/_tests_scripts/{z_mvp_mnist_unet.py => dl_z_mvp_mnist_unet.py} (86%) rename tests/_tests_scripts/{z_mvp_mnist_vae.py => dl_z_mvp_mnist_vae.py} (90%) rename tests/_tests_scripts/{z_mvp_projector.py => dl_z_mvp_projector.py} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 045d93b8b9..541b261fab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - -## [20.05] - YYYY-MM-DD +## [20.05] - 2020-05-07 ### Added @@ -46,6 +46,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Github actions CI was updated ([#754](https://github.com/catalyst-team/catalyst/pull/754)) - Changed default `num_epochs` to 1 for `State` ([#756](https://github.com/catalyst-team/catalyst/pull/756)) - Changed `state.batch_in`/`state.batch_out` to `state.input`/`state.output` ([#763](https://github.com/catalyst-team/catalyst/pull/763)) +- Moved `torchvision` dependency from `catalyst` to `catalyst[cv]` ([#738](https://github.com/catalyst-team/catalyst/pull/738))) ### Removed @@ -64,7 +65,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed Dockerfile dependency ([#780](https://github.com/catalyst-team/catalyst/pull/780)) -## [20.04] - 2020-04-21 +## [20.04] - 2020-04-06 ### Added diff --git a/Makefile b/Makefile index 2781015ea1..b1df4865eb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: check-docs docker docker-fp16 docker-dev docker-dev-fp16 install-from-source clean check-docs: - bash ./bin/codestyle/_check_docs.sh + bash ./bin/tests/check_docs.sh docker: ./requirements/ echo building $${REPO_NAME:-catalyst-base}:$${TAG:-latest} ... diff --git a/bin/tests/check_dl_core.sh b/bin/tests/check_dl_core.sh index 5ed3e8a5f9..b276dc3363 100755 --- a/bin/tests/check_dl_core.sh +++ b/bin/tests/check_dl_core.sh @@ -13,7 +13,7 @@ rm -rf ./tests/logs # (set -e; for f in tests/_tests_scripts/*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) (set -e; for f in tests/_tests_scripts/core_*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) (set -e; for f in tests/_tests_scripts/dl_*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) -(set -e; for f in tests/_tests_scripts/z_*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) +#(set -e; for f in tests/_tests_scripts/z_*.py; do PYTHONPATH=./catalyst:${PYTHONPATH} python "$f"; done) ################################ pipeline 99 ################################ diff --git a/bin/tests/check_dl_core_callbacks.sh b/bin/tests/check_dl_core_callbacks.sh index 1319a42e06..6f56af8cc8 100644 --- a/bin/tests/check_dl_core_callbacks.sh +++ b/bin/tests/check_dl_core_callbacks.sh @@ -39,7 +39,7 @@ function check_checkpoints { ################################ pipeline 00 ################################ -# checking dafult parameters of checkpoint and one stage +# checking default parameters of checkpoint and one stage LOG_MSG='pipeline 00' echo ${LOG_MSG} @@ -295,7 +295,7 @@ rm -rf ${LOGDIR} ################################ pipeline 09 ################################ -# checking with one checkpoint and two stages +# checking with one checkpoint and two stages # with different ''load_on_stage_end'' options LOG_MSG='pipeline 09' echo ${LOG_MSG} @@ -325,7 +325,7 @@ rm -rf ${LOGDIR} ################################ pipeline 10 ################################ -# checking with three checkpoints and two stages +# checking with three checkpoints and two stages # with different ''load_on_stage_end'' options LOG_MSG='pipeline 10' echo ${LOG_MSG} diff --git a/bin/tests/check_dl_core_settings.sh b/bin/tests/check_dl_core_settings.sh new file mode 100644 index 0000000000..b392831e73 --- /dev/null +++ b/bin/tests/check_dl_core_settings.sh @@ -0,0 +1,191 @@ +#!/usr/bin/env bash + +# Cause the script to exit if a single command fails +set -eo pipefail -v + +pip uninstall -r requirements/requirements-contrib.txt -y +pip uninstall -r requirements/requirements-cv.txt -y +pip uninstall -r requirements/requirements-ecosystem.txt -y +pip uninstall -r requirements/requirements-ml.txt -y +pip uninstall -r requirements/requirements-nlp.txt -y +pip install -r requirements/requirements.txt + +################################ pipeline 00 ################################ +# checking catalyst-core loading (default) +cat < .catalyst +[catalyst] +contrib_required = false +cv_required = false +ml_required = false +nlp_required = false +EOT + +python -c """ +from catalyst.contrib.dl import callbacks +from catalyst.contrib import utils + +try: + callbacks.AlchemyLogger +except (AttributeError, ImportError): + pass # Ok +else: + raise AssertionError('\'ImportError\' expected') +""" + + +################################ pipeline 01 ################################ +# checking catalyst-contrib dependencies loading +cat < .catalyst +[catalyst] +contrib_required = true +cv_required = false +ml_required = false +nlp_required = false +EOT + +# check if fail if requirements not installed +python -c """ +from catalyst.tools import settings + +assert settings.use_lz4 == False and settings.use_pyarrow == False + +try: + from catalyst.contrib.dl.callbacks import AlchemyLogger, VisdomLogger +except ImportError: + pass # Ok +else: + raise AssertionError('\'ImportError\' expected') +""" + +pip install -r requirements/requirements-contrib.txt +pip install -r requirements/requirements-ecosystem.txt + +python -c """ +from catalyst.contrib.dl.callbacks import AlchemyLogger, VisdomLogger +""" + + +################################ pipeline 02 ################################ +# checking catalyst-cv dependencies loading +cat < .catalyst +[catalyst] +contrib_required = false +cv_required = true +ml_required = false +nlp_required = false +EOT + +# check if fail if requirements not installed +python -c """ +from catalyst.tools import settings + +assert settings.use_libjpeg_turbo == False + +try: + from catalyst.contrib.data import cv as cv_data + from catalyst.contrib.dl.callbacks import InferMaskCallback + from catalyst.contrib.models import cv as cv_models + from catalyst.contrib.utils import imread, imwrite + from catalyst.data.__main__ import COMMANDS + + assert not ( + 'process-images' in COMMANDS + or 'process-images' in COMMANDS + or 'project-embeddings' in COMMANDS + ) +except (ImportError, AssertionError): + pass # Ok +else: + raise AssertionError('\'ImportError\' or \'AssertionError\' expected') +""" + +pip install -r requirements/requirements-cv.txt + +python -c """ +from catalyst.contrib.data import cv as cv_data +from catalyst.contrib.dl.callbacks import InferMaskCallback +from catalyst.contrib.models import cv as cv_models +from catalyst.contrib.utils import imread, imwrite +from catalyst.data.__main__ import COMMANDS + +assert ( + 'process-images' in COMMANDS + and 'process-images' in COMMANDS + and 'project-embeddings' in COMMANDS +) +""" + + +################################ pipeline 03 ################################ +# checking catalyst-ml dependencies loading +cat < .catalyst +[catalyst] +contrib_required = false +cv_required = false +ml_required = true +nlp_required = false +EOT + +# check if fail if requirements not installed +python -c """ +try: + from catalyst.contrib.__main__ import COMMANDS + + assert not ( + 'check-index-model' in COMMANDS or 'create-index-model' in COMMANDS + ) +except (ImportError, AssertionError): + pass # Ok +else: + raise AssertionError('\'ImportError\' or \'AssertionError\' expected') +""" + +pip install -r requirements/requirements-ml.txt + +python -c """ +from catalyst.contrib.__main__ import COMMANDS + +assert 'check-index-model' in COMMANDS and 'create-index-model' in COMMANDS +""" + + +################################ pipeline 04 ################################ +# checking catalyst-nlp dependencies loading +cat < .catalyst +[catalyst] +contrib_required = false +cv_required = false +ml_required = false +nlp_required = true +EOT + +# check if fail if requirements not installed +python -c """ +try: + from catalyst.contrib.data import nlp as nlp_data + from catalyst.contrib.models import nlp as nlp_models + from catalyst.contrib.utils import tokenize_text, process_bert_output + from catalyst.contrib.__main__ import COMMANDS as CONTRIB_SCRIPTS + from catalyst.data.__main__ import COMMANDS + + assert 'text2embedding' not in COMMANDS +except (ImportError, AssertionError): + pass # Ok +else: + raise AssertionError('\'ImportError\' or \'AssertionError\' expected') +""" + +pip install -r requirements/requirements-nlp.txt + +python -c """ +from catalyst.contrib.data import nlp as nlp_data +from catalyst.contrib.models import nlp as nlp_models +from catalyst.contrib.utils import tokenize_text, process_bert_output +from catalyst.data.__main__ import COMMANDS + +assert 'text2embedding' in COMMANDS +""" + + +################################ pipeline 99 ################################ +rm .catalyst diff --git a/bin/codestyle/_check_docs.sh b/bin/tests/check_docs.sh similarity index 100% rename from bin/codestyle/_check_docs.sh rename to bin/tests/check_docs.sh diff --git a/catalyst/contrib/__main__.py b/catalyst/contrib/__main__.py index eefe0ea377..846cc9edc7 100644 --- a/catalyst/contrib/__main__.py +++ b/catalyst/contrib/__main__.py @@ -1,9 +1,9 @@ from argparse import ArgumentParser, RawTextHelpFormatter from collections import OrderedDict import logging -import os from catalyst.contrib.scripts import collect_env, find_thresholds +from catalyst.tools import settings logger = logging.getLogger(__name__) @@ -18,7 +18,7 @@ COMMANDS["check-index-model"] = check_index_model COMMANDS["create-index-model"] = create_index_model except ImportError as ex: - if os.environ.get("USE_NMSLIB", "0") == "1": + if settings.nmslib_required: logger.warning( "nmslib not available, to install nmslib," " run `pip install nmslib`." diff --git a/catalyst/contrib/data/cv/__init__.py b/catalyst/contrib/data/cv/__init__.py index d41944fb13..50dc4b2084 100644 --- a/catalyst/contrib/data/cv/__init__.py +++ b/catalyst/contrib/data/cv/__init__.py @@ -1,4 +1,5 @@ # flake8: noqa from .mixins import * +from .reader import * from .transforms import * diff --git a/catalyst/contrib/data/cv/reader.py b/catalyst/contrib/data/cv/reader.py new file mode 100644 index 0000000000..263f121867 --- /dev/null +++ b/catalyst/contrib/data/cv/reader.py @@ -0,0 +1,92 @@ +from typing import Tuple, Union + +from catalyst import utils +from catalyst.data.reader import ReaderSpec + + +class ImageReader(ReaderSpec): + """Image reader abstraction. Reads images from a ``csv`` dataset.""" + + def __init__( + self, + input_key: str, + output_key: str, + rootpath: str = None, + grayscale: bool = False, + ): + """ + Args: + input_key (str): key to use from annotation dict + output_key (str): key to use to store the result + rootpath (str): path to images dataset root directory + (so your can use relative paths in annotations) + grayscale (bool): flag if you need to work only + with grayscale images + """ + super().__init__(input_key, output_key) + self.rootpath = rootpath + self.grayscale = grayscale + + def __call__(self, element): + """Reads a row from your annotations dict with filename and + transfer it to an image + + Args: + element: elem in your dataset + + Returns: + np.ndarray: Image + """ + image_name = str(element[self.input_key]) + img = utils.imread( + image_name, rootpath=self.rootpath, grayscale=self.grayscale + ) + + output = {self.output_key: img} + return output + + +class MaskReader(ReaderSpec): + """Mask reader abstraction. Reads masks from a `csv` dataset.""" + + def __init__( + self, + input_key: str, + output_key: str, + rootpath: str = None, + clip_range: Tuple[Union[int, float], Union[int, float]] = (0, 1), + ): + """ + Args: + input_key (str): key to use from annotation dict + output_key (str): key to use to store the result + rootpath (str): path to images dataset root directory + (so your can use relative paths in annotations) + clip_range (Tuple[int, int]): lower and upper interval edges, + image values outside the interval are clipped + to the interval edges + """ + super().__init__(input_key, output_key) + self.rootpath = rootpath + self.clip = clip_range + + def __call__(self, element): + """Reads a row from your annotations dict with filename and + transfer it to a mask + + Args: + element: elem in your dataset. + + Returns: + np.ndarray: Mask + """ + mask_name = str(element[self.input_key]) + mask = utils.mimread( + mask_name, rootpath=self.rootpath, clip_range=self.clip + ) + + output = {self.output_key: mask} + return output + + +__all__ = ["ImageReader", "MaskReader"] diff --git a/catalyst/contrib/data/dataset/README.md b/catalyst/contrib/data/dataset/README.md new file mode 100644 index 0000000000..d3205dd2cf --- /dev/null +++ b/catalyst/contrib/data/dataset/README.md @@ -0,0 +1 @@ +This subpackage was borrowed from [torchvision](https://github.com/pytorch/vision). diff --git a/catalyst/contrib/data/dataset/__init__.py b/catalyst/contrib/data/dataset/__init__.py new file mode 100644 index 0000000000..d4baedca8b --- /dev/null +++ b/catalyst/contrib/data/dataset/__init__.py @@ -0,0 +1,8 @@ +# flake8: noqa + +from catalyst.contrib.data.dataset.mnist import MNIST +from catalyst.contrib.data.dataset.transforms import ( + Compose, + Normalize, + ToTensor, +) diff --git a/catalyst/contrib/data/dataset/mnist.py b/catalyst/contrib/data/dataset/mnist.py new file mode 100644 index 0000000000..054cbaea01 --- /dev/null +++ b/catalyst/contrib/data/dataset/mnist.py @@ -0,0 +1,271 @@ +import codecs +import os + +import numpy as np + +import torch +from torch.utils.data import Dataset + +from catalyst.contrib.data.dataset.utils import download_and_extract_archive + + +class MNIST(Dataset): + """`MNIST `_ Dataset.""" + + _repr_indent = 4 + + resources = [ + ( + "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", + "f68b3c2dcbeaaa9fbdd348bbdeb94873", + ), + ( + "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", + "d53e105ee54ea40749a09fcbcd1e9432", + ), + ( + "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", + "9fb629c4189551a2d022fa330f9573f3", + ), + ( + "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", + "ec29112dd5afa0611ce80d1b7f02629c", + ), + ] + + training_file = "training.pt" + test_file = "test.pt" + classes = [ + "0 - zero", + "1 - one", + "2 - two", + "3 - three", + "4 - four", + "5 - five", + "6 - six", + "7 - seven", + "8 - eight", + "9 - nine", + ] + + def __init__( + self, + root, + train=True, + transform=None, + target_transform=None, + download=False, + ): + """ + Args: + root (string): Root directory of dataset where + ``MNIST/processed/training.pt`` + and ``MNIST/processed/test.pt`` exist. + train (bool, optional): If True, creates dataset from + ``training.pt``, otherwise from ``test.pt``. + download (bool, optional): If true, downloads the dataset from + the internet and puts it in root directory. If dataset + is already downloaded, it is not downloaded again. + transform (callable, optional): A function/transform that + takes in an image and returns a transformed version. + target_transform (callable, optional): A function/transform + that takes in the target and transforms it. + """ + if isinstance(root, torch._six.string_classes): + root = os.path.expanduser(root) + self.root = root + self.train = train # training set or test set + self.transform = transform + self.target_transform = target_transform + + if download: + self.download() + + if not self._check_exists(): + raise RuntimeError( + "Dataset not found. You can use download=True to download it" + ) + + if self.train: + data_file = self.training_file + else: + data_file = self.test_file + self.data, self.targets = torch.load( + os.path.join(self.processed_folder, data_file) + ) + + def __getitem__(self, index): + """ + Args: + index (int): Index + + Returns: + tuple: (image, target) where target is index of the target class. + """ + img, target = self.data[index].numpy(), int(self.targets[index]) + + if self.transform is not None: + img = self.transform(img) + + if self.target_transform is not None: + target = self.target_transform(target) + + return img, target + + def __len__(self): + """@TODO: Docs. Contribution is welcome.""" + return len(self.data) + + def __repr__(self): + """@TODO: Docs. Contribution is welcome.""" + head = "Dataset " + self.__class__.__name__ + body = ["Number of datapoints: {}".format(self.__len__())] + if self.root is not None: + body.append("Root location: {}".format(self.root)) + body += self.extra_repr().splitlines() + if hasattr(self, "transforms") and self.transforms is not None: + body += [repr(self.transforms)] + lines = [head] + [" " * self._repr_indent + line for line in body] + return "\n".join(lines) + + @property + def raw_folder(self): + """@TODO: Docs. Contribution is welcome.""" + return os.path.join(self.root, self.__class__.__name__, "raw") + + @property + def processed_folder(self): + """@TODO: Docs. Contribution is welcome.""" + return os.path.join(self.root, self.__class__.__name__, "processed") + + @property + def class_to_idx(self): + """@TODO: Docs. Contribution is welcome.""" + return {_class: i for i, _class in enumerate(self.classes)} + + def _check_exists(self): + return os.path.exists( + os.path.join(self.processed_folder, self.training_file) + ) and os.path.exists( + os.path.join(self.processed_folder, self.test_file) + ) + + def download(self): + """Download the MNIST data if it doesn't exist in processed_folder.""" + if self._check_exists(): + return + + os.makedirs(self.raw_folder, exist_ok=True) + os.makedirs(self.processed_folder, exist_ok=True) + + # download files + for url, md5 in self.resources: + filename = url.rpartition("/")[2] + download_and_extract_archive( + url, download_root=self.raw_folder, filename=filename, md5=md5 + ) + + # process and save as torch files + print("Processing...") + + training_set = ( + read_image_file( + os.path.join(self.raw_folder, "train-images-idx3-ubyte") + ), + read_label_file( + os.path.join(self.raw_folder, "train-labels-idx1-ubyte") + ), + ) + test_set = ( + read_image_file( + os.path.join(self.raw_folder, "t10k-images-idx3-ubyte") + ), + read_label_file( + os.path.join(self.raw_folder, "t10k-labels-idx1-ubyte") + ), + ) + with open( + os.path.join(self.processed_folder, self.training_file), "wb" + ) as f: + torch.save(training_set, f) + with open( + os.path.join(self.processed_folder, self.test_file), "wb" + ) as f: + torch.save(test_set, f) + + print("Done!") + + def extra_repr(self): + """@TODO: Docs. Contribution is welcome.""" + return "Split: {}".format("Train" if self.train is True else "Test") + + +def get_int(b): + """@TODO: Docs. Contribution is welcome.""" + return int(codecs.encode(b, "hex"), 16) + + +def open_maybe_compressed_file(path): + """Return a file object that possibly decompresses 'path' on the fly. + Decompression occurs when argument `path` is a string + and ends with '.gz' or '.xz'. + """ + if not isinstance(path, torch._six.string_classes): + return path + if path.endswith(".gz"): + import gzip + + return gzip.open(path, "rb") + if path.endswith(".xz"): + import lzma + + return lzma.open(path, "rb") + return open(path, "rb") + + +def read_sn3_pascalvincent_tensor(path, strict=True): + """Read a SN3 file in "Pascal Vincent" format. + Argument may be a filename, compressed filename, or file object. + """ + # typemap + if not hasattr(read_sn3_pascalvincent_tensor, "typemap"): + read_sn3_pascalvincent_tensor.typemap = { + 8: (torch.uint8, np.uint8, np.uint8), + 9: (torch.int8, np.int8, np.int8), + 11: (torch.int16, np.dtype(">i2"), "i2"), + 12: (torch.int32, np.dtype(">i4"), "i4"), + 13: (torch.float32, np.dtype(">f4"), "f4"), + 14: (torch.float64, np.dtype(">f8"), "f8"), + } + # read + with open_maybe_compressed_file(path) as f: + data = f.read() + # parse + magic = get_int(data[0:4]) + nd = magic % 256 + ty = magic // 256 + assert nd >= 1 and nd <= 3 + assert ty >= 8 and ty <= 14 + m = read_sn3_pascalvincent_tensor.typemap[ty] + s = [get_int(data[4 * (i + 1) : 4 * (i + 2)]) for i in range(nd)] + parsed = np.frombuffer(data, dtype=m[1], offset=(4 * (nd + 1))) + assert parsed.shape[0] == np.prod(s) or not strict + return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s) + + +def read_label_file(path): + """@TODO: Docs. Contribution is welcome.""" + with open(path, "rb") as f: + x = read_sn3_pascalvincent_tensor(f, strict=False) + assert x.dtype == torch.uint8 + assert x.ndimension() == 1 + return x.long() + + +def read_image_file(path): + """@TODO: Docs. Contribution is welcome.""" + with open(path, "rb") as f: + x = read_sn3_pascalvincent_tensor(f, strict=False) + assert x.dtype == torch.uint8 + assert x.ndimension() == 3 + return x diff --git a/catalyst/contrib/data/dataset/transforms.py b/catalyst/contrib/data/dataset/transforms.py new file mode 100644 index 0000000000..dc12626d1d --- /dev/null +++ b/catalyst/contrib/data/dataset/transforms.py @@ -0,0 +1,151 @@ +import numpy as np + +import torch + + +def to_tensor(pic: np.ndarray) -> torch.Tensor: + """Convert ``numpy.ndarray`` to tensor. + + Args: + pic (PIL Image or numpy.ndarray): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + if not isinstance(pic, np.ndarray): + raise TypeError(f"pic should be ndarray. Got {type(pic)}") + if pic.ndim not in {2, 3}: + raise ValueError( + f"pic should be 2/3 dimensional. Got {pic.ndim} dimensions." + ) + + if pic.ndim == 2: + pic = pic[:, :, None] + + img = torch.from_numpy(pic.transpose((2, 0, 1))) + # backward compatibility + if isinstance(img, torch.ByteTensor): + return img.float().div(255) + return img + + +def normalize(tensor, mean, std, inplace=False): + """Normalize a tensor image with mean and standard deviation. + + .. note:: + This transform acts out of place by default, i.e., + it does not mutates the input tensor. + + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + mean (sequence): Sequence of means for each channel. + std (sequence): Sequence of standard deviations for each channel. + inplace(bool,optional): Bool to make this operation inplace. + + Returns: + Tensor: Normalized Tensor image. + """ + if not (torch.is_tensor(tensor) and tensor.ndimension() == 3): + raise TypeError("tensor is not a torch image.") + + if not inplace: + tensor = tensor.clone() + + dtype = tensor.dtype + mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device) + std = torch.as_tensor(std, dtype=dtype, device=tensor.device) + tensor.sub_(mean[:, None, None]).div_(std[:, None, None]) + return tensor + + +class Compose: + """Composes several transforms together.""" + + def __init__(self, transforms): + """ + Args: + transforms (List): list of transforms to compose. + + Example: + >>> Compose([ToTensor(), Normalize()]) + """ + self.transforms = transforms + + def __call__(self, img): + """@TODO: Docs. Contribution is welcome.""" + for t in self.transforms: + img = t(img) + return img + + def __repr__(self): + """@TODO: Docs. Contribution is welcome.""" + format_string = self.__class__.__name__ + "(" + for t in self.transforms: + format_string += "\n" + format_string += " {0}".format(t) + format_string += "\n)" + return format_string + + +class ToTensor(object): + """Convert a ``numpy.ndarray`` to tensor. + Converts numpy.ndarray (H x W x C) in the range [0, 255] to a + torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] + if the numpy.ndarray has dtype = np.uint8 + In the other cases, tensors are returned without scaling. + """ + + def __call__(self, pic): + """ + Args: + pic (PIL Image or numpy.ndarray): Image to be converted to tensor. + + Returns: + Tensor: Converted image. + """ + return to_tensor(pic) + + def __repr__(self): + """@TODO: Docs. Contribution is welcome.""" + return self.__class__.__name__ + "()" + + +class Normalize(object): + """Normalize a tensor image with mean and standard deviation. + + Given mean: ``(mean[1],...,mean[n])`` and std: ``(std[1],..,std[n])`` + for ``n`` channels, this transform will normalize each channel of the input + ``torch.*Tensor`` i.e., + ``output[channel] = (input[channel] - mean[channel]) / std[channel]`` + + .. note:: + This transform acts out of place, i.e., + it does not mutate the input tensor. + """ + + def __init__(self, mean, std, inplace=False): + """ + Args: + mean (sequence): Sequence of means for each channel. + std (sequence): Sequence of standard deviations for each channel. + inplace(bool,optional): Bool to make this operation in-place. + """ + self.mean = mean + self.std = std + self.inplace = inplace + + def __call__(self, tensor): + """ + Args: + tensor (Tensor): Tensor image of size (C, H, W) to be normalized. + + Returns: + Tensor: Normalized Tensor image. + """ + return normalize(tensor, self.mean, self.std, self.inplace) + + def __repr__(self): + """@TODO: Docs. Contribution is welcome.""" + return self.__class__.__name__ + "(mean={0}, std={1})".format( + self.mean, self.std + ) diff --git a/catalyst/contrib/data/dataset/utils.py b/catalyst/contrib/data/dataset/utils.py new file mode 100644 index 0000000000..15ae1fee41 --- /dev/null +++ b/catalyst/contrib/data/dataset/utils.py @@ -0,0 +1,140 @@ +import gzip +import hashlib +import os +import tarfile +import zipfile + +from torch.utils.model_zoo import tqdm + + +def gen_bar_updater(): + """@TODO: Docs. Contribution is welcome.""" + pbar = tqdm(total=None) + + def bar_update(count, block_size, total_size): + if pbar.total is None and total_size: + pbar.total = total_size + progress_bytes = count * block_size + pbar.update(progress_bytes - pbar.n) + + return bar_update + + +def calculate_md5(fpath, chunk_size=1024 * 1024): + """@TODO: Docs. Contribution is welcome.""" + md5 = hashlib.md5() + with open(fpath, "rb") as f: + for chunk in iter(lambda: f.read(chunk_size), b""): + md5.update(chunk) + return md5.hexdigest() + + +def check_md5(fpath, md5, **kwargs): + """@TODO: Docs. Contribution is welcome.""" + return md5 == calculate_md5(fpath, **kwargs) + + +def check_integrity(fpath, md5=None): + """@TODO: Docs. Contribution is welcome.""" + if not os.path.isfile(fpath): + return False + if md5 is None: + return True + return check_md5(fpath, md5) + + +def download_url(url, root, filename=None, md5=None): + """Download a file from a url and place it in root. + + Args: + url (str): URL to download file from + root (str): Directory to place downloaded file in + filename (str, optional): Name to save the file under. + If None, use the basename of the URL + md5 (str, optional): MD5 checksum of the download. + If None, do not check + """ + import urllib + + root = os.path.expanduser(root) + if not filename: + filename = os.path.basename(url) + fpath = os.path.join(root, filename) + + os.makedirs(root, exist_ok=True) + + # check if file is already present locally + if check_integrity(fpath, md5): + print("Using downloaded and verified file: " + fpath) + else: # download the file + try: + print("Downloading " + url + " to " + fpath) + urllib.request.urlretrieve( + url, fpath, reporthook=gen_bar_updater() + ) + except (urllib.error.URLError, IOError) as e: + if url[:5] == "https": + url = url.replace("https:", "http:") + print( + "Failed download. Trying https -> http instead." + " Downloading " + url + " to " + fpath + ) + urllib.request.urlretrieve( + url, fpath, reporthook=gen_bar_updater() + ) + else: + raise e + # check integrity of downloaded file + if not check_integrity(fpath, md5): + raise RuntimeError("File not found or corrupted.") + + +def extract_archive(from_path, to_path=None, remove_finished=False): + """@TODO: Docs. Contribution is welcome.""" + if to_path is None: + to_path = os.path.dirname(from_path) + + if from_path.endswith(".tar"): + with tarfile.open(from_path, "r") as tar: + tar.extractall(path=to_path) + elif from_path.endswith(".tar.gz") or from_path.endswith(".tgz"): + with tarfile.open(from_path, "r:gz") as tar: + tar.extractall(path=to_path) + elif from_path.endswith(".tar.xz"): + with tarfile.open(from_path, "r:xz") as tar: + tar.extractall(path=to_path) + elif from_path.endswith(".gz"): + root, _ = os.path.splitext(os.path.basename(from_path)) + to_path = os.path.join(to_path, root) + with open(to_path, "wb") as out_f, gzip.GzipFile(from_path) as zip_f: + out_f.write(zip_f.read()) + elif from_path.endswith(".zip"): + with zipfile.ZipFile(from_path, "r") as z: + z.extractall(to_path) + else: + raise ValueError(f"Extraction of {from_path} not supported") + + if remove_finished: + os.remove(from_path) + + +def download_and_extract_archive( + url, + download_root, + extract_root=None, + filename=None, + md5=None, + remove_finished=False, +): + """@TODO: Docs. Contribution is welcome.""" + download_root = os.path.expanduser(download_root) + if extract_root is None: + extract_root = download_root + if not filename: + filename = os.path.basename(url) + + download_url(url, download_root, filename, md5) + + archive = os.path.join(download_root, filename) + print(f"Extracting {archive} to {extract_root}") + extract_archive(archive, extract_root, remove_finished) diff --git a/catalyst/contrib/dl/__init__.py b/catalyst/contrib/dl/__init__.py index e3698163c5..e69de29bb2 100644 --- a/catalyst/contrib/dl/__init__.py +++ b/catalyst/contrib/dl/__init__.py @@ -1,5 +0,0 @@ -# flake8: noqa - -# from .callbacks import * -# from .experiment import * -# from .runner import * diff --git a/catalyst/contrib/dl/callbacks/__init__.py b/catalyst/contrib/dl/callbacks/__init__.py index b35c964238..be4097c222 100644 --- a/catalyst/contrib/dl/callbacks/__init__.py +++ b/catalyst/contrib/dl/callbacks/__init__.py @@ -2,6 +2,8 @@ import logging import os +from catalyst.tools import settings + from .cutmix_callback import CutmixCallback from .knn import KNNMetricCallback from .optimizer import SaveModelGradsCallback @@ -9,11 +11,22 @@ logger = logging.getLogger(__name__) +try: + import imageio + from .inference import InferMaskCallback +except ImportError as ex: + if settings.cv_required: + logger.warning( + "some of catalyst-cv dependencies not available," + " to install dependencies, run `pip install catalyst[cv]`." + ) + raise ex + try: import alchemy from .alchemy import AlchemyLogger except ImportError as ex: - if os.environ.get("USE_ALCHEMY", "0") == "1": + if settings.alchemy_logger_required: logger.warning( "alchemy not available, to install alchemy, " "run `pip install alchemy`." @@ -24,7 +37,7 @@ import visdom from .visdom_logger import VisdomLogger except ImportError as ex: - if os.environ.get("USE_VISDOM", "0") == "1": + if settings.visdom_logger_required: logger.warning( "visdom not available, to install visdom, " "run `pip install visdom`." @@ -35,7 +48,7 @@ import neptune from .neptune import NeptuneLogger except ImportError as ex: - if os.environ.get("USE_NEPTUNE", "0") == "1": + if settings.neptune_logger_required: logger.warning( "neptune not available, to install neptune, " "run `pip install neptune-client`." @@ -46,7 +59,7 @@ import wandb from .wandb import WandbLogger except ImportError as ex: - if os.environ.get("USE_WANDB", "0") == "1": + if settings.wandb_logger_required: logger.warning( "wandb not available, to install wandb, " "run `pip install wandb`." diff --git a/catalyst/contrib/dl/callbacks/inference.py b/catalyst/contrib/dl/callbacks/inference.py new file mode 100644 index 0000000000..0176f5daac --- /dev/null +++ b/catalyst/contrib/dl/callbacks/inference.py @@ -0,0 +1,126 @@ +import os + +import imageio +import numpy as np +from skimage.color import label2rgb + +import torch +import torch.nn.functional as F + +from catalyst.dl import Callback, CallbackOrder, State, utils + + +class InferMaskCallback(Callback): + """@TODO: Docs. Contribution is welcome.""" + + def __init__( + self, + out_dir=None, + out_prefix=None, + input_key=None, + output_key=None, + name_key=None, + mean=None, + std=None, + threshold: float = 0.5, + mask_strength: float = 0.5, + mask_type: str = "soft", + ): + """ + Args: + @TODO: Docs. Contribution is welcome + """ + super().__init__(CallbackOrder.Internal) + self.out_dir = out_dir + self.out_prefix = out_prefix + self.mean = mean or np.array([0.485, 0.456, 0.406]) + self.std = std or np.array([0.229, 0.224, 0.225]) + assert input_key is not None + assert output_key is not None + self.threshold = threshold + self.mask_strength = mask_strength + self.mask_type = mask_type + self.input_key = input_key + self.output_key = output_key + self.name_key = name_key + self.counter = 0 + self._keys_from_state = ["out_dir", "out_prefix"] + + def on_stage_start(self, state: State): + """Stage start hook. + + Args: + state (State): current state + """ + for key in self._keys_from_state: + value = getattr(state, key, None) + if value is not None: + setattr(self, key, value) + # assert self.out_prefix is not None + self.out_prefix = ( + self.out_prefix if self.out_prefix is not None else "" + ) + if self.out_dir is not None: + self.out_prefix = str(self.out_dir) + "/" + str(self.out_prefix) + os.makedirs(os.path.dirname(self.out_prefix), exist_ok=True) + + def on_loader_start(self, state: State): + """Loader start hook. + + Args: + state (State): current state + """ + lm = state.loader_name + os.makedirs(f"{self.out_prefix}/{lm}/", exist_ok=True) + + def on_batch_end(self, state: State): + """Batch end hook. + + Args: + state (State): current state + """ + lm = state.loader_name + names = state.input.get(self.name_key, []) + + features = state.input[self.input_key].detach().cpu() + images = utils.tensor_to_ndimage(features) + + logits = state.output[self.output_key] + logits = ( + torch.unsqueeze_(logits, dim=1) + if len(logits.shape) < 4 + else logits + ) + + if self.mask_type == "soft": + probabilities = torch.sigmoid(logits) + else: + probabilities = F.softmax(logits, dim=1) + probabilities = probabilities.detach().cpu().numpy() + + masks = [] + for probability in probabilities: + mask = np.zeros_like(probability[0], dtype=np.int32) + for i, ch in enumerate(probability): + mask[ch >= self.threshold] = i + 1 + masks.append(mask) + + for i, (image, mask) in enumerate(zip(images, masks)): + try: + suffix = names[i] + except IndexError: + suffix = f"{self.counter:06d}" + self.counter += 1 + + mask = label2rgb(mask, bg_label=0) + + image = ( + image * (1 - self.mask_strength) + mask * self.mask_strength + ) + image = (image * 255).clip(0, 255).round().astype(np.uint8) + + filename = f"{self.out_prefix}/{lm}/{suffix}.jpg" + imageio.imwrite(filename, image) + + +__all__ = ["InferMaskCallback"] diff --git a/catalyst/contrib/dl/callbacks/optimizer.py b/catalyst/contrib/dl/callbacks/optimizer.py index 509bc5d9e7..0f971bb972 100644 --- a/catalyst/contrib/dl/callbacks/optimizer.py +++ b/catalyst/contrib/dl/callbacks/optimizer.py @@ -4,7 +4,7 @@ from torch.nn.parallel import DistributedDataParallel from catalyst.core import Callback, CallbackNode, CallbackOrder, State -from catalyst.utils.tools.typing import Model +from catalyst.tools.typing import Model class SaveModelGradsCallback(Callback): diff --git a/catalyst/contrib/dl/callbacks/telegram_logger.py b/catalyst/contrib/dl/callbacks/telegram_logger.py index 402f625712..d043c8d5d8 100644 --- a/catalyst/contrib/dl/callbacks/telegram_logger.py +++ b/catalyst/contrib/dl/callbacks/telegram_logger.py @@ -1,11 +1,11 @@ from typing import List import logging -import os from urllib.parse import quote_plus from urllib.request import Request, urlopen from catalyst import utils from catalyst.core import Callback, CallbackNode, CallbackOrder, State +from catalyst.tools import settings class TelegramLogger(Callback): @@ -39,10 +39,8 @@ def __init__( """ super().__init__(order=CallbackOrder.Logging, node=CallbackNode.Master) # @TODO: replace this logic with global catalyst config at ~/.catalyst - self._token = token or os.environ.get("CATALYST_TELEGRAM_TOKEN", None) - self._chat_id = chat_id or os.environ.get( - "CATALYST_TELEGRAM_CHAT_ID", None - ) + self._token = token or settings.telegram_logger_token + self._chat_id = chat_id or settings.telegram_logger_chat_id assert self._token is not None and self._chat_id is not None self._base_url = ( f"https://api.telegram.org/bot{self._token}/sendMessage" diff --git a/catalyst/contrib/dl/callbacks/tests/test_optimizer_callback.py b/catalyst/contrib/dl/callbacks/tests/test_optimizer_callback.py index a965c3e8f9..26cd7f7dfa 100644 --- a/catalyst/contrib/dl/callbacks/tests/test_optimizer_callback.py +++ b/catalyst/contrib/dl/callbacks/tests/test_optimizer_callback.py @@ -7,10 +7,9 @@ import torch.nn as nn from torch.optim import Adam from torch.utils.data import DataLoader -from torchvision.datasets import MNIST -import torchvision.transforms as transforms from catalyst.contrib import registry +from catalyst.contrib.data.dataset import MNIST, ToTensor from catalyst.contrib.dl.callbacks.optimizer import SaveModelGradsCallback from catalyst.core import ( Callback, @@ -82,7 +81,7 @@ def conv2d_size_out( def _get_loaders(*, root: str, batch_size: int = 1, num_workers: int = 1): - data_transform = transforms.ToTensor() + data_transform = ToTensor() trainset = MNIST( root=root, train=True, download=True, transform=data_transform diff --git a/catalyst/contrib/nn/modules/lama.py b/catalyst/contrib/nn/modules/lama.py index dfa7fc175c..7c049c43e8 100644 --- a/catalyst/contrib/nn/modules/lama.py +++ b/catalyst/contrib/nn/modules/lama.py @@ -1,7 +1,7 @@ import torch from torch import nn -from catalyst.utils import outer_init +from catalyst.utils.initialization import outer_init class TemporalLastPooling(nn.Module): diff --git a/catalyst/contrib/nn/schedulers/base.py b/catalyst/contrib/nn/schedulers/base.py index a0d3d8d64f..34d7185032 100644 --- a/catalyst/contrib/nn/schedulers/base.py +++ b/catalyst/contrib/nn/schedulers/base.py @@ -3,7 +3,7 @@ from torch.optim.lr_scheduler import _LRScheduler -from catalyst.utils import set_optimizer_momentum +from catalyst.utils.torch import set_optimizer_momentum class BaseScheduler(_LRScheduler, ABC): diff --git a/catalyst/contrib/nn/schedulers/onecycle.py b/catalyst/contrib/nn/schedulers/onecycle.py index 87df6a0283..4d572e9975 100644 --- a/catalyst/contrib/nn/schedulers/onecycle.py +++ b/catalyst/contrib/nn/schedulers/onecycle.py @@ -4,7 +4,7 @@ from torch.optim import Optimizer -from catalyst.utils import get_optimizer_momentum +from catalyst.utils.torch import get_optimizer_momentum from .base import BatchScheduler diff --git a/catalyst/contrib/registry.py b/catalyst/contrib/registry.py index a198f919ca..3fe90bb4bf 100644 --- a/catalyst/contrib/registry.py +++ b/catalyst/contrib/registry.py @@ -2,9 +2,9 @@ catalyst subpackage registries """ import logging -import os -from catalyst.utils.tools.registry import Registry +from catalyst.tools import settings +from catalyst.tools.registry import Registry logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ def _transforms_loader(r: Registry): r.add_from_module(t, prefix=["catalyst.", "C."]) except ImportError as ex: - if os.environ.get("USE_ALBUMENTATIONS", "0") == "1": + if settings.albumentations_required: logger.warning( "albumentations not available, to install albumentations, " "run `pip install albumentations`." @@ -95,8 +95,14 @@ def _model_loader(r: Registry): import segmentation_models_pytorch as smp r.add_from_module(smp, prefix="smp.") - except ImportError: - pass + except ImportError as ex: + if settings.segmentation_models_required: + logger.warning( + "segmentation_models_pytorch not available," + " to install segmentation_models_pytorch," + " run `pip install segmentation-models-pytorch`." + ) + raise ex MODELS = Registry("model") diff --git a/catalyst/contrib/utils/__init__.py b/catalyst/contrib/utils/__init__.py index 9282370e40..897ea511ce 100644 --- a/catalyst/contrib/utils/__init__.py +++ b/catalyst/contrib/utils/__init__.py @@ -6,6 +6,8 @@ logger = logging.getLogger(__name__) +from catalyst.tools import settings + from .argparse import boolean_flag from .compression import pack, pack_if_needed, unpack, unpack_if_needed from .confusion_matrix import ( @@ -13,23 +15,14 @@ calculate_confusion_matrix_from_arrays, calculate_confusion_matrix_from_tensors, ) +from .cv import * from .dataset import create_dataset, split_dataset_train_test, create_dataframe -from .image import ( - has_image_extension, - imread, - imwrite, - imsave, - mask_to_overlay_image, - mimread, - mimwrite_with_meta, - tensor_from_rgb_image, - tensor_to_ndimage, -) from .misc import ( args_are_not_none, make_tuple, pairwise, ) +from .nlp import * from .pandas import ( dataframe_to_list, folds_to_list, @@ -53,24 +46,13 @@ import plotly # noqa: F401 from .plotly import plot_tensorboard_log, plot_metrics except ImportError as ex: - if os.environ.get("USE_PLOTLY", "0") == "1": + if settings.plotly_required: logger.warning( "plotly not available, to install plotly," " run `pip install plotly`." ) raise ex -try: - import transformers # noqa: F401 - from .text import tokenize_text, process_bert_output -except ImportError as ex: - if os.environ.get("USE_TRANSFORMERS", "0") == "1": - logger.warning( - "transformers not available, to install transformers," - " run `pip install transformers`." - ) - raise ex - from .visualization import ( plot_confusion_matrix, render_figure_to_tensor, diff --git a/catalyst/contrib/utils/compression.py b/catalyst/contrib/utils/compression.py index 79ad6ee7a1..1bd565a957 100644 --- a/catalyst/contrib/utils/compression.py +++ b/catalyst/contrib/utils/compression.py @@ -1,26 +1,23 @@ import base64 import logging -import os import numpy as np from six import string_types +from catalyst.tools import settings + from .serialization import deserialize, serialize logger = logging.getLogger(__name__) -try: - import lz4.frame - - LZ4_ENABLED = True -except ImportError as ex: - if os.environ.get("USE_LZ4", "0") == "1": +if settings.use_lz4: + try: + import lz4.frame + except ImportError as ex: logger.warning( - "lz4 not available, disabling compression. " - "To install lz4, run `pip install lz4`." + "lz4 not available, to install lz4, run `pip install lz4`." ) raise ex - LZ4_ENABLED = False def is_compressed(data): @@ -30,7 +27,7 @@ def is_compressed(data): def compress(data): """@TODO: Docs. Contribution is welcome.""" - if LZ4_ENABLED: + if settings.use_lz4: data = serialize(data) data = lz4.frame.compress(data) data = base64.b64encode(data).decode("ascii") @@ -46,7 +43,7 @@ def compress_if_needed(data): def decompress(data): """@TODO: Docs. Contribution is welcome.""" - if LZ4_ENABLED: + if settings.use_lz4: data = base64.b64decode(data) data = lz4.frame.decompress(data) data = deserialize(data) @@ -60,7 +57,7 @@ def decompress_if_needed(data): return data -if LZ4_ENABLED: +if settings.use_lz4: pack = compress pack_if_needed = compress_if_needed unpack = decompress diff --git a/catalyst/contrib/utils/cv/__init__.py b/catalyst/contrib/utils/cv/__init__.py new file mode 100644 index 0000000000..5f3b085a14 --- /dev/null +++ b/catalyst/contrib/utils/cv/__init__.py @@ -0,0 +1,32 @@ +# flake8: noqa +# isort:skip_file + +import logging +import os + +logger = logging.getLogger(__name__) + +from catalyst.tools import settings + +try: + from catalyst.contrib.utils.cv.image import ( + has_image_extension, + imread, + imwrite, + imsave, + mask_to_overlay_image, + mimread, + mimwrite_with_meta, + ) +except ImportError as ex: + if settings.cv_required: + logger.warning( + "some of catalyst-cv dependencies not available," + " to install dependencies, run `pip install catalyst[cv]`." + ) + raise ex + +from catalyst.contrib.utils.cv.tensor import ( + tensor_from_rgb_image, + tensor_to_ndimage, +) diff --git a/catalyst/contrib/utils/image.py b/catalyst/contrib/utils/cv/image.py similarity index 69% rename from catalyst/contrib/utils/image.py rename to catalyst/contrib/utils/cv/image.py index 182422424e..00b6312e8b 100644 --- a/catalyst/contrib/utils/image.py +++ b/catalyst/contrib/utils/cv/image.py @@ -8,15 +8,11 @@ import numpy as np from skimage.color import label2rgb, rgb2gray -import torch - -_IMAGENET_STD = (0.229, 0.224, 0.225) -_IMAGENET_MEAN = (0.485, 0.456, 0.406) +from catalyst.tools import settings logger = logging.getLogger(__name__) -JPEG4PY_ENABLED = False -if os.environ.get("FORCE_JPEG_TURBO", False): +if settings.use_libjpeg_turbo: try: import jpeg4py as jpeg @@ -26,17 +22,18 @@ imageio.imwrite(fp.name, img) img = jpeg.JPEG(fp.name).decode() - JPEG4PY_ENABLED = True - except ImportError: + except ImportError as ex: logger.warning( "jpeg4py not available. " "To install jpeg4py, run `pip install jpeg4py`." ) - except OSError: + raise ex + except OSError as ex: logger.warning( "libjpeg-turbo not available. " "To install libjpeg-turbo, run `apt-get install libturbojpeg`." ) + raise ex def imread( @@ -66,7 +63,9 @@ def imread( rootpath = str(rootpath) uri = uri if uri.startswith(rootpath) else os.path.join(rootpath, uri) - if JPEG4PY_ENABLED and uri.endswith(("jpg", "JPG", "jpeg", "JPEG")): + if settings.use_libjpeg_turbo and uri.endswith( + ("jpg", "JPG", "jpeg", "JPEG") + ): img = jpeg.JPEG(uri).decode() else: # @TODO: add tiff support, currently – jpg and png @@ -128,63 +127,6 @@ def mimwrite_with_meta(uri, ims, meta, **kwargs): writer.append_data(i) -def tensor_from_rgb_image(image: np.ndarray) -> torch.Tensor: - """@TODO: Docs. Contribution is welcome.""" - image = np.moveaxis(image, -1, 0) - image = np.ascontiguousarray(image) - image = torch.from_numpy(image) - return image - - -def tensor_to_ndimage( - images: torch.Tensor, - denormalize: bool = True, - mean: Tuple[float, float, float] = _IMAGENET_MEAN, - std: Tuple[float, float, float] = _IMAGENET_STD, - move_channels_dim: bool = True, - dtype=np.float32, -) -> np.ndarray: - """ - Convert float image(s) with standard normalization to - np.ndarray with [0..1] when dtype is np.float32 and [0..255] - when dtype is `np.uint8`. - - Args: - images (torch.Tensor): [B]xCxHxW float tensor - denormalize (bool): if True, multiply image(s) by std and add mean - mean (Tuple[float, float, float]): per channel mean to add - std (Tuple[float, float, float]): per channel std to multiply - move_channels_dim (bool): if True, convert tensor to [B]xHxWxC format - dtype: result ndarray dtype. Only float32 and uint8 are supported - - Returns: - [B]xHxWxC np.ndarray of dtype - """ - if denormalize: - has_batch_dim = len(images.shape) == 4 - - mean = images.new_tensor(mean).view( - *((1,) if has_batch_dim else ()), len(mean), 1, 1 - ) - std = images.new_tensor(std).view( - *((1,) if has_batch_dim else ()), len(std), 1, 1 - ) - - images = images * std + mean - - images = images.clamp(0, 1).numpy() - - if move_channels_dim: - images = np.moveaxis(images, -3, -1) - - if dtype == np.uint8: - images = (images * 255).round().astype(dtype) - else: - assert dtype == np.float32, "Only float32 and uint8 are supported" - - return images - - def mask_to_overlay_image( image: np.ndarray, masks: List[np.ndarray], @@ -240,6 +182,4 @@ def has_image_extension(uri) -> bool: "mask_to_overlay_image", "mimread", "mimwrite_with_meta", - "tensor_from_rgb_image", - "tensor_to_ndimage", ] diff --git a/catalyst/contrib/utils/cv/tensor.py b/catalyst/contrib/utils/cv/tensor.py new file mode 100644 index 0000000000..280b7d1842 --- /dev/null +++ b/catalyst/contrib/utils/cv/tensor.py @@ -0,0 +1,68 @@ +from typing import Tuple + +import numpy as np + +import torch + +_IMAGENET_STD = (0.229, 0.224, 0.225) +_IMAGENET_MEAN = (0.485, 0.456, 0.406) + + +def tensor_from_rgb_image(image: np.ndarray) -> torch.Tensor: + """@TODO: Docs. Contribution is welcome.""" + image = np.moveaxis(image, -1, 0) + image = np.ascontiguousarray(image) + image = torch.from_numpy(image) + return image + + +def tensor_to_ndimage( + images: torch.Tensor, + denormalize: bool = True, + mean: Tuple[float, float, float] = _IMAGENET_MEAN, + std: Tuple[float, float, float] = _IMAGENET_STD, + move_channels_dim: bool = True, + dtype=np.float32, +) -> np.ndarray: + """ + Convert float image(s) with standard normalization to + np.ndarray with [0..1] when dtype is np.float32 and [0..255] + when dtype is `np.uint8`. + + Args: + images (torch.Tensor): [B]xCxHxW float tensor + denormalize (bool): if True, multiply image(s) by std and add mean + mean (Tuple[float, float, float]): per channel mean to add + std (Tuple[float, float, float]): per channel std to multiply + move_channels_dim (bool): if True, convert tensor to [B]xHxWxC format + dtype: result ndarray dtype. Only float32 and uint8 are supported + + Returns: + [B]xHxWxC np.ndarray of dtype + """ + if denormalize: + has_batch_dim = len(images.shape) == 4 + + mean = images.new_tensor(mean).view( + *((1,) if has_batch_dim else ()), len(mean), 1, 1 + ) + std = images.new_tensor(std).view( + *((1,) if has_batch_dim else ()), len(std), 1, 1 + ) + + images = images * std + mean + + images = images.clamp(0, 1).numpy() + + if move_channels_dim: + images = np.moveaxis(images, -3, -1) + + if dtype == np.uint8: + images = (images * 255).round().astype(dtype) + else: + assert dtype == np.float32, "Only float32 and uint8 are supported" + + return images + + +__all__ = ["tensor_from_rgb_image", "tensor_to_ndimage"] diff --git a/catalyst/contrib/utils/cv/tests/__init__.py b/catalyst/contrib/utils/cv/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/catalyst/contrib/utils/tests/test_image.py b/catalyst/contrib/utils/cv/tests/test_image.py similarity index 92% rename from catalyst/contrib/utils/tests/test_image.py rename to catalyst/contrib/utils/cv/tests/test_image.py index 13bfc05488..02f2c1389c 100644 --- a/catalyst/contrib/utils/tests/test_image.py +++ b/catalyst/contrib/utils/cv/tests/test_image.py @@ -1,10 +1,10 @@ import numpy as np import torch -from torchvision.transforms.functional import normalize, to_tensor from catalyst import utils -from catalyst.contrib.utils.image import _IMAGENET_MEAN, _IMAGENET_STD +from catalyst.contrib.data.dataset.transforms import normalize, to_tensor +from catalyst.contrib.utils.cv.tensor import _IMAGENET_MEAN, _IMAGENET_STD def test_imread(): diff --git a/catalyst/contrib/utils/nlp/__init__.py b/catalyst/contrib/utils/nlp/__init__.py new file mode 100644 index 0000000000..60e379ce58 --- /dev/null +++ b/catalyst/contrib/utils/nlp/__init__.py @@ -0,0 +1,23 @@ +# flake8: noqa +# isort:skip_file + +import logging +import os + +logger = logging.getLogger(__name__) + +from catalyst.tools import settings + +try: + import transformers # noqa: F401 + from catalyst.contrib.utils.nlp.text import ( + tokenize_text, + process_bert_output, + ) +except ImportError as ex: + if settings.transformers_required: + logger.warning( + "transformers not available, to install transformers," + " run `pip install transformers`." + ) + raise ex diff --git a/catalyst/contrib/utils/text.py b/catalyst/contrib/utils/nlp/text.py similarity index 100% rename from catalyst/contrib/utils/text.py rename to catalyst/contrib/utils/nlp/text.py diff --git a/catalyst/contrib/utils/serialization.py b/catalyst/contrib/utils/serialization.py index 20c68e8fa3..d61745ad74 100644 --- a/catalyst/contrib/utils/serialization.py +++ b/catalyst/contrib/utils/serialization.py @@ -1,21 +1,19 @@ import logging -import os import pickle -logger = logging.getLogger(__name__) +from catalyst.tools import settings -try: - import pyarrow +logger = logging.getLogger(__name__) - PYARROW_ENABLED = True -except ImportError as ex: - if os.environ.get("USE_PYARROW", "0") == "1": +if settings.use_pyarrow: + try: + import pyarrow + except ImportError as ex: logger.warning( "pyarrow not available, switching to pickle. " "To install pyarrow, run `pip install pyarrow`." ) raise ex - PYARROW_ENABLED = False def pyarrow_serialize(data): @@ -66,7 +64,7 @@ def pickle_deserialize(data): return pickle.loads(data) -if PYARROW_ENABLED: +if settings.use_pyarrow: serialize = pyarrow_serialize deserialize = pyarrow_deserialize else: diff --git a/catalyst/contrib/utils/tests/test_pandas.py b/catalyst/contrib/utils/tests/test_pandas.py index 19af4264f0..d341700197 100644 --- a/catalyst/contrib/utils/tests/test_pandas.py +++ b/catalyst/contrib/utils/tests/test_pandas.py @@ -1,6 +1,6 @@ import pytest -from catalyst.utils import pandas +from catalyst.contrib.utils import pandas def test_folds_to_list(): diff --git a/catalyst/contrib/utils/tools/tensorboard.py b/catalyst/contrib/utils/tools/tensorboard.py index e98708b901..43b2d1dc43 100644 --- a/catalyst/contrib/utils/tools/tensorboard.py +++ b/catalyst/contrib/utils/tools/tensorboard.py @@ -15,7 +15,6 @@ os.environ["CRC32C_SW_MODE"] = "auto" from crc32c import crc32 as crc32c # noqa: E402 -import cv2 # noqa: E402 import numpy as np # noqa: E402 # Native tensorboard support from 1.2.0 version of PyTorch @@ -145,33 +144,15 @@ def _get_scalar(value) -> Optional[np.ndarray]: return None -def _get_image(value) -> Optional[np.ndarray]: - """Decode an image event. - - Args: - value: A value field of an event - - Returns: - Decoded image - """ - if value.HasField("image"): - encoded_image = value.image.encoded_image_string - buf = np.frombuffer(encoded_image, np.uint8) - data = cv2.imdecode(buf, cv2.IMREAD_COLOR) - return data - return None - - class SummaryReader(Iterable): """Iterates over events in all the files in the current logdir. .. note:: - Only scalars and images are supported at the moment. + Only scalars are supported at the moment. """ _DECODERS = { "scalar": _get_scalar, - "image": _get_image, } def __init__( diff --git a/catalyst/contrib/utils/tools/tests/test_tensorboard.py b/catalyst/contrib/utils/tools/tests/test_tensorboard.py index 331242aa58..3221b2faa6 100644 --- a/catalyst/contrib/utils/tools/tests/test_tensorboard.py +++ b/catalyst/contrib/utils/tools/tests/test_tensorboard.py @@ -4,7 +4,6 @@ from pathlib import Path from unittest.mock import patch -import cv2 import numpy as np import pytest @@ -23,14 +22,12 @@ def _get_test_data(): x 1.0 1 y -1.0 1 x 2.0 2 - z zeros 2×2×3 1 The first event is empty with wall_time = 1557489465 log.add_scalar("x", 1.0, global_step=1) log.add_scalar("y", -1.0, global_step=1) log.add_scalar("x", 2.0, global_step=2) - log.add_image("z", np.zeros((2, 2, 1)), global_step=1) """ data_raw = [ @@ -38,15 +35,6 @@ def _get_test_data(): {"tag": "x", "value": 1.0, "step": 1, "type": "scalar"}, {"tag": "y", "value": -1.0, "step": 1, "type": "scalar"}, {"tag": "x", "value": 2.0, "step": 2, "type": "scalar"}, - { - "tag": "z", - "value": np.zeros((2, 2, 3)), - "height": 2, - "width": 2, - "channels": 3, - "step": 1, - "type": "image", - }, ] # noqa: Q000 data = ( @@ -67,12 +55,6 @@ def _get_test_data(): return data, data_raw -def _compare_image_data(png, data): - png_buf = np.frombuffer(png, np.uint8) - png_decoded = cv2.imdecode(png_buf, cv2.IMREAD_COLOR) - assert np.all(png_decoded == data), "Corrupted image data" - - def test_events_reader_successful(): """@TODO: Docs. Contribution is welcome.""" data, data_raw = _get_test_data() @@ -88,15 +70,6 @@ def test_events_reader_successful(): assert ( event.summary.value[0].simple_value == event_raw["value"] ) - elif event_raw["type"] == "image": - assert event.summary.value[0].HasField("image") - assert event.summary.value[0].image.height == 2 - assert event.summary.value[0].image.width == 2 - assert event.summary.value[0].image.colorspace == 3 - _compare_image_data( - event.summary.value[0].image.encoded_image_string, - event_raw["value"], - ) def test_events_reader_empty(): @@ -141,7 +114,7 @@ def _open(path, mode): @patch("builtins.open", _open) def test_summary_reader_iterate(): """@TODO: Docs. Contribution is welcome.""" - reader = SummaryReader("logs", types=["scalar", "image"]) + reader = SummaryReader("logs", types=["scalar"]) _, data_raw = _get_test_data() data_raw2 = 2 * [d for d in data_raw if d is not None] items = list(reader) @@ -161,7 +134,7 @@ def test_summary_reader_iterate(): def test_summary_reader_filter(): """@TODO: Docs. Contribution is welcome.""" tags = ["x", "z"] - reader = SummaryReader("logs", tag_filter=tags, types=["scalar", "image"]) + reader = SummaryReader("logs", tag_filter=tags, types=["scalar"]) _, data_raw = _get_test_data() data_raw2 = 2 * [d for d in data_raw if d is not None and d["tag"] in tags] items = list(reader) diff --git a/catalyst/contrib/utils/visualization.py b/catalyst/contrib/utils/visualization.py index 79aeb87ec5..cd04b1d77e 100644 --- a/catalyst/contrib/utils/visualization.py +++ b/catalyst/contrib/utils/visualization.py @@ -2,7 +2,7 @@ import numpy as np -from .image import tensor_from_rgb_image +from .cv import tensor_from_rgb_image def plot_confusion_matrix( diff --git a/catalyst/core/callbacks/early_stop.py b/catalyst/core/callbacks/early_stop.py index 47a07f06bb..4158dcb10a 100644 --- a/catalyst/core/callbacks/early_stop.py +++ b/catalyst/core/callbacks/early_stop.py @@ -1,19 +1,10 @@ -import os - from catalyst.core import Callback, CallbackNode, CallbackOrder, State -_NUM_BATCH_STEPS = int(os.environ.get("CHECK_BATCH_STEPS", 2)) -_NUM_EPOCH_STEPS = int(os.environ.get("CHECK_EPOCH_STEPS", 2)) - class CheckRunCallback(Callback): """@TODO: Docs. Contribution is welcome.""" - def __init__( - self, - num_batch_steps: int = _NUM_BATCH_STEPS, - num_epoch_steps: int = _NUM_EPOCH_STEPS, - ): + def __init__(self, num_batch_steps: int = 3, num_epoch_steps: int = 2): """@TODO: Docs. Contribution is welcome.""" super().__init__(order=CallbackOrder.External, node=CallbackNode.All) self.num_batch_steps = num_batch_steps diff --git a/catalyst/core/callbacks/optimizer.py b/catalyst/core/callbacks/optimizer.py index ca5d3937e9..1c187a1bbf 100644 --- a/catalyst/core/callbacks/optimizer.py +++ b/catalyst/core/callbacks/optimizer.py @@ -10,7 +10,7 @@ State, utils, ) -from catalyst.utils.tools.typing import Optimizer +from catalyst.tools.typing import Optimizer logger = logging.getLogger(__name__) diff --git a/catalyst/core/callbacks/timer.py b/catalyst/core/callbacks/timer.py index 86bd2920b8..0c18c7ced8 100644 --- a/catalyst/core/callbacks/timer.py +++ b/catalyst/core/callbacks/timer.py @@ -1,5 +1,5 @@ from catalyst.core import Callback, CallbackNode, CallbackOrder, State -from catalyst.utils.tools.time_manager import TimeManager +from catalyst.tools.time_manager import TimeManager EPS = 1e-8 diff --git a/catalyst/core/experiment.py b/catalyst/core/experiment.py index a7193f3874..d91c6b5849 100644 --- a/catalyst/core/experiment.py +++ b/catalyst/core/experiment.py @@ -5,7 +5,7 @@ from torch import nn from torch.utils.data import DataLoader, Dataset -from catalyst.utils.tools.typing import Criterion, Model, Optimizer, Scheduler +from catalyst.tools.typing import Criterion, Model, Optimizer, Scheduler from .callback import Callback diff --git a/catalyst/core/registry.py b/catalyst/core/registry.py index c79e3a99ab..f9d39e7a54 100644 --- a/catalyst/core/registry.py +++ b/catalyst/core/registry.py @@ -15,7 +15,7 @@ Transform, TRANSFORMS, ) -from catalyst.utils.tools.registry import Registry +from catalyst.tools.registry import Registry def _callbacks_loader(r: Registry): diff --git a/catalyst/core/runner.py b/catalyst/core/runner.py index e50ce6f27d..267429ea58 100644 --- a/catalyst/core/runner.py +++ b/catalyst/core/runner.py @@ -7,12 +7,8 @@ from torch.utils.data import DataLoader, DistributedSampler from catalyst.core import utils -from catalyst.utils.tools.settings import ( - LOADER_INFER_PREFIX, - LOADER_TRAIN_PREFIX, - LOADER_VALID_PREFIX, -) -from catalyst.utils.tools.typing import ( +from catalyst.tools import settings +from catalyst.tools.typing import ( Criterion, Device, Model, @@ -430,15 +426,22 @@ def _run_epoch(self, stage: str, epoch: int) -> None: else: # @TODO: add check for non distributed run for inference assert not any( - x.startswith(LOADER_TRAIN_PREFIX) for x in loaders.keys() + x.startswith(settings.loader_train_prefix) + for x in loaders.keys() ), "for inference no train loader should be passed" for loader_name, loader in loaders.items(): state.loader_name = loader_name state.loader_len = len(loader) - state.is_train_loader = loader_name.startswith(LOADER_TRAIN_PREFIX) - state.is_valid_loader = loader_name.startswith(LOADER_VALID_PREFIX) - state.is_infer_loader = loader_name.startswith(LOADER_INFER_PREFIX) + state.is_train_loader = loader_name.startswith( + settings.loader_train_prefix + ) + state.is_valid_loader = loader_name.startswith( + settings.loader_valid_prefix + ) + state.is_infer_loader = loader_name.startswith( + settings.loader_infer_prefix + ) utils.maybe_recursive_call( self.model, "train", mode=state.is_train_loader, ) diff --git a/catalyst/core/state.py b/catalyst/core/state.py index 6016c1f487..dd39848f47 100644 --- a/catalyst/core/state.py +++ b/catalyst/core/state.py @@ -6,13 +6,9 @@ from torch.utils.data import DataLoader from catalyst.core import utils -from catalyst.utils.tools.frozen_class import FrozenClass -from catalyst.utils.tools.settings import ( - LOADER_VALID_PREFIX, - STAGE_INFER_PREFIX, - STATE_MAIN_METRIC, -) -from catalyst.utils.tools.typing import ( +from catalyst.tools import settings +from catalyst.tools.frozen_class import FrozenClass +from catalyst.tools.typing import ( Criterion, Device, Model, @@ -288,11 +284,11 @@ def __init__( scheduler: StateScheduler = None, callbacks: Dict[str, "Callback"] = None, logdir: str = None, - stage: str = STAGE_INFER_PREFIX, + stage: str = settings.stage_infer_prefix, # @TODO: wtf? num_epochs: int = 1, - main_metric: str = STATE_MAIN_METRIC, + main_metric: str = "loss", minimize_metric: bool = True, - valid_loader: str = LOADER_VALID_PREFIX, + valid_loader: str = settings.loader_valid_prefix, checkpoint_data: Dict = None, is_check_run: bool = False, **kwargs, @@ -374,7 +370,7 @@ def __init__( self.is_valid_loader: bool = False self.is_infer_loader: bool = False self.is_infer_stage: bool = self.stage_name.startswith( - STAGE_INFER_PREFIX + settings.stage_infer_prefix ) self.need_early_stop: bool = False self.need_exception_reraise: bool = True diff --git a/catalyst/data/__init__.py b/catalyst/data/__init__.py index ee4a6a6d2c..a1d46a718f 100644 --- a/catalyst/data/__init__.py +++ b/catalyst/data/__init__.py @@ -9,14 +9,7 @@ NumpyDataset, PathsDataset, ) -from .reader import ( - ImageReader, - LambdaReader, - MaskReader, - ReaderCompose, - ReaderSpec, - ScalarReader, -) +from .reader import LambdaReader, ReaderCompose, ReaderSpec, ScalarReader from .sampler import ( BalanceClassSampler, DistributedSamplerWrapper, diff --git a/catalyst/data/__main__.py b/catalyst/data/__main__.py index 9fbf4fdac8..be791d9df8 100644 --- a/catalyst/data/__main__.py +++ b/catalyst/data/__main__.py @@ -73,36 +73,43 @@ from argparse import ArgumentParser, RawTextHelpFormatter from collections import OrderedDict import logging -import os from catalyst.__version__ import __version__ -from catalyst.data.scripts import ( - image2embedding, - process_images, - project_embeddings, - split_dataframe, - tag2label, -) +from catalyst.data.scripts import split_dataframe, tag2label +from catalyst.tools import settings logger = logging.getLogger(__name__) COMMANDS = OrderedDict( - [ - ("tag2label", tag2label), - ("process-images", process_images), - ("split-dataframe", split_dataframe), - ("image2embedding", image2embedding), - ("project-embeddings", project_embeddings), - ] + [("tag2label", tag2label), ("split-dataframe", split_dataframe)] ) +try: + import imageio # noqa: F401 + from catalyst.data.scripts import ( + image2embedding, + process_images, + project_embeddings, + ) + + COMMANDS["process-images"] = process_images + COMMANDS["image2embedding"] = image2embedding + COMMANDS["project-embeddings"] = project_embeddings +except ImportError as ex: + if settings.cv_required: + logger.warning( + "some of catalyst-cv dependencies not available," + " to install dependencies, run `pip install catalyst[cv]`." + ) + raise ex + try: import transformers # noqa: F401 from catalyst.data.scripts import text2embedding COMMANDS["text2embedding"] = text2embedding except ImportError as ex: - if os.environ.get("USE_TRANSFORMERS", "0") == "1": + if settings.transformers_required: logger.warning( "transformers not available, to install transformers," " run `pip install transformers`." diff --git a/catalyst/data/reader.py b/catalyst/data/reader.py index d3567bd058..1511f40a24 100644 --- a/catalyst/data/reader.py +++ b/catalyst/data/reader.py @@ -1,9 +1,9 @@ -from typing import Callable, List, Tuple, Type, Union +from typing import Callable, List, Type import functools import numpy as np -from catalyst.utils import get_one_hot, imread, mimread +from catalyst.utils import get_one_hot class ReaderSpec: @@ -41,89 +41,6 @@ def __call__(self, element): ) -class ImageReader(ReaderSpec): - """Image reader abstraction. Reads images from a ``csv`` dataset.""" - - def __init__( - self, - input_key: str, - output_key: str, - rootpath: str = None, - grayscale: bool = False, - ): - """ - Args: - input_key (str): key to use from annotation dict - output_key (str): key to use to store the result - rootpath (str): path to images dataset root directory - (so your can use relative paths in annotations) - grayscale (bool): flag if you need to work only - with grayscale images - """ - super().__init__(input_key, output_key) - self.rootpath = rootpath - self.grayscale = grayscale - - def __call__(self, element): - """Reads a row from your annotations dict with filename and - transfer it to an image - - Args: - element: elem in your dataset - - Returns: - np.ndarray: Image - """ - image_name = str(element[self.input_key]) - img = imread( - image_name, rootpath=self.rootpath, grayscale=self.grayscale - ) - - output = {self.output_key: img} - return output - - -class MaskReader(ReaderSpec): - """Mask reader abstraction. Reads masks from a `csv` dataset.""" - - def __init__( - self, - input_key: str, - output_key: str, - rootpath: str = None, - clip_range: Tuple[Union[int, float], Union[int, float]] = (0, 1), - ): - """ - Args: - input_key (str): key to use from annotation dict - output_key (str): key to use to store the result - rootpath (str): path to images dataset root directory - (so your can use relative paths in annotations) - clip_range (Tuple[int, int]): lower and upper interval edges, - image values outside the interval are clipped - to the interval edges - """ - super().__init__(input_key, output_key) - self.rootpath = rootpath - self.clip = clip_range - - def __call__(self, element): - """Reads a row from your annotations dict with filename and - transfer it to a mask - - Args: - element: elem in your dataset. - - Returns: - np.ndarray: Mask - """ - mask_name = str(element[self.input_key]) - mask = mimread(mask_name, rootpath=self.rootpath, clip_range=self.clip) - - output = {self.output_key: mask} - return output - - class ScalarReader(ReaderSpec): """ Numeric data reader abstraction. @@ -256,7 +173,6 @@ def __call__(self, element): __all__ = [ "ReaderSpec", - "ImageReader", "ScalarReader", "LambdaReader", "ReaderCompose", diff --git a/catalyst/data/scripts/image2embedding.py b/catalyst/data/scripts/image2embedding.py index 6133fb7bae..02115f1b0c 100644 --- a/catalyst/data/scripts/image2embedding.py +++ b/catalyst/data/scripts/image2embedding.py @@ -1,3 +1,4 @@ +from typing import Sequence import argparse from pathlib import Path @@ -7,19 +8,43 @@ from tqdm import tqdm import torch -from torchvision import transforms +from catalyst.contrib.data.cv import ImageReader from catalyst.contrib.models.cv import ResnetEncoder -from catalyst.data import ImageReader from catalyst.dl import utils cv2.setNumThreads(0) cv2.ocl.setUseOpenCL(False) IMG_SIZE = (224, 224) -IMAGENET_NORM = transforms.Normalize( - (0.485, 0.456, 0.406), (0.229, 0.224, 0.225) -) + + +def normalize( + tensor: torch.Tensor, + mean: Sequence[float] = (0.485, 0.456, 0.406), + std: Sequence[float] = (0.229, 0.224, 0.225), +): + """Normalize a tensor image with mean and standard deviation. + + Args: + tensor (torch.Tensor): Tensor image of size (C, H, W) to be normalized + mean (Sequence[float]): Sequence of means for each channel + std (Sequence[float]): Sequence of standard deviations for each channel + + Returns: + torch.Tensor: Normalized Tensor image + """ + dtype = tensor.dtype + mean = torch.as_tensor(mean, dtype=dtype, device=tensor.device) + std = torch.as_tensor(std, dtype=dtype, device=tensor.device) + + if mean.ndim == 1: + mean = mean[:, None, None] + if std.ndim == 1: + std = std[:, None, None] + + tensor.sub_(mean).div_(std) + return tensor def dict_transformer(sample): @@ -30,7 +55,7 @@ def dict_transformer(sample): # image = np.concatenate([np.expand_dims(image, -1)] * 3, axis=-1) image = cv2.resize(image, IMG_SIZE, interpolation=cv2.INTER_NEAREST) image = torch.from_numpy(image.astype(np.float32) / 255.0).permute(2, 0, 1) - image = IMAGENET_NORM(image) + image = normalize(image) sample["image"] = image return sample diff --git a/catalyst/data/scripts/process_images.py b/catalyst/data/scripts/process_images.py index 518936a189..4d4f22db78 100644 --- a/catalyst/data/scripts/process_images.py +++ b/catalyst/data/scripts/process_images.py @@ -18,14 +18,7 @@ import cv2 import numpy as np -from catalyst.utils import ( - boolean_flag, - get_pool, - has_image_extension, - imread, - imwrite, - tqdm_parallel_imap, -) +from catalyst import utils # Limit cv2's processor usage # cv2.setNumThreads() doesn't work @@ -65,13 +58,15 @@ def build_args(parser): help="Output images size. E.g. 224, 448", ) - boolean_flag(parser, "clear-exif", default=True, help="Clear EXIF data") + utils.boolean_flag( + parser, "clear-exif", default=True, help="Clear EXIF data" + ) - boolean_flag( + utils.boolean_flag( parser, "grayscale", default=False, help="Read images in grayscale" ) - boolean_flag( + utils.boolean_flag( parser, "expand-dims", default=True, @@ -168,7 +163,7 @@ def preprocess(self, image_path: Path): # imread does not have exifrotate for non-jpeg type kwargs["exifrotate"] = not self.clear_exif - image = np.array(imread(uri=image_path, **kwargs)) + image = np.array(utils.imread(uri=image_path, **kwargs)) except Exception as e: print(f"Cannot read file {image_path}, exception: {e}") return @@ -180,7 +175,7 @@ def preprocess(self, image_path: Path): target_path.parent.mkdir(parents=True, exist_ok=True) image = image.clip(0, 255).round().astype(np.uint8) - imwrite(target_path, image) + utils.imwrite(target_path, image) def process_all(self, pool: Pool): """@TODO: Docs. Contribution is welcome.""" @@ -191,11 +186,11 @@ def process_all(self, pool: Pool): [ root / filename for filename in files - if has_image_extension(filename) + if utils.has_image_extension(filename) ] ) - tqdm_parallel_imap(self.preprocess, images, pool) + utils.tqdm_parallel_imap(self.preprocess, images, pool) def main(args, _=None): @@ -204,7 +199,7 @@ def main(args, _=None): args.pop("command", None) num_workers = args.pop("num_workers") - with get_pool(num_workers) as p: + with utils.get_pool(num_workers) as p: Preprocessor(**args).process_all(p) diff --git a/catalyst/data/scripts/split_dataframe.py b/catalyst/data/scripts/split_dataframe.py index 1cd1713df3..052a0e05c3 100644 --- a/catalyst/data/scripts/split_dataframe.py +++ b/catalyst/data/scripts/split_dataframe.py @@ -4,7 +4,7 @@ import pandas as pd -from catalyst.utils import folds_to_list, split_dataframe +from catalyst import utils def build_args(parser): @@ -94,17 +94,17 @@ def main(args, uargs=None): dataframe = pd.read_csv(args.in_csv) train_folds = ( - folds_to_list(args.train_folds) + utils.folds_to_list(args.train_folds) if args.train_folds is not None else None ) valid_folds = ( - folds_to_list(args.valid_folds) + utils.folds_to_list(args.valid_folds) if args.valid_folds is not None else None ) infer_folds = ( - folds_to_list(args.infer_folds) + utils.folds_to_list(args.infer_folds) if args.infer_folds is not None else None ) @@ -113,7 +113,7 @@ def main(args, uargs=None): json.load(open(args.tag2class)) if args.tag2class is not None else None ) - df_all, train, valid, infer = split_dataframe( + df_all, train, valid, infer = utils.split_dataframe( dataframe, train_folds=train_folds, valid_folds=valid_folds, diff --git a/catalyst/data/scripts/tag2label.py b/catalyst/data/scripts/tag2label.py index ade8679f8e..0190e7b178 100644 --- a/catalyst/data/scripts/tag2label.py +++ b/catalyst/data/scripts/tag2label.py @@ -3,13 +3,7 @@ import pandas as pd -from catalyst.utils import ( - boolean_flag, - create_dataframe, - create_dataset, - get_dataset_labeling, - separate_tags, -) +from catalyst import utils def build_args(parser): @@ -51,7 +45,7 @@ def build_args(parser): default=None, help="Separator if you want to use several target columns", ) - boolean_flag( + utils.boolean_flag( parser, "recursive", default=False, help="Include subdirs in dataset", ) @@ -83,12 +77,14 @@ def process_fn(x): if not in_dir.endswith("/"): in_dir = f"{in_dir}/" - dataset = create_dataset( + dataset = utils.create_dataset( f"{in_dir}/**", process_fn=process_fn, recursive=recursive ) dfs.append( - create_dataframe(dataset, columns=[tag_column_name, "filepath"]) + utils.create_dataframe( + dataset, columns=[tag_column_name, "filepath"] + ) ) df = pd.concat(dfs).reset_index(drop=True) @@ -107,11 +103,11 @@ def main(args, _=None): raise Exception if args.tag_delim is not None: - df = separate_tags( + df = utils.separate_tags( df, tag_column=args.tag_column, tag_delim=args.tag_delim ) - tag2lbl = get_dataset_labeling(df, args.tag_column) + tag2lbl = utils.get_dataset_labeling(df, args.tag_column) print("Num classes: ", len(tag2lbl)) with open(args.out_labeling, "w") as fout: diff --git a/catalyst/data/scripts/text2embedding.py b/catalyst/data/scripts/text2embedding.py index fb2d128c2a..95579502c4 100644 --- a/catalyst/data/scripts/text2embedding.py +++ b/catalyst/data/scripts/text2embedding.py @@ -10,7 +10,7 @@ import torch from transformers import BertConfig, BertModel, BertTokenizer -from catalyst.contrib.utils.text import process_bert_output, tokenize_text +from catalyst.contrib.utils import process_bert_output, tokenize_text from catalyst.data import LambdaReader from catalyst.dl import utils diff --git a/catalyst/dl/callbacks/__init__.py b/catalyst/dl/callbacks/__init__.py index a00d7ed4c4..7ccf7741d6 100644 --- a/catalyst/dl/callbacks/__init__.py +++ b/catalyst/dl/callbacks/__init__.py @@ -4,7 +4,7 @@ from catalyst.core.callbacks import * from .confusion_matrix import ConfusionMatrixCallback -from .inference import InferCallback, InferMaskCallback +from .inference import InferCallback from .meter import MeterMetricsCallback from .metrics import ( AccuracyCallback, diff --git a/catalyst/dl/callbacks/inference.py b/catalyst/dl/callbacks/inference.py index c7e90cbc91..8f22560dd7 100644 --- a/catalyst/dl/callbacks/inference.py +++ b/catalyst/dl/callbacks/inference.py @@ -1,14 +1,9 @@ from collections import defaultdict import os -import imageio import numpy as np -from skimage.color import label2rgb -import torch -import torch.nn.functional as F - -from catalyst.dl import Callback, CallbackOrder, State, utils +from catalyst.dl import Callback, CallbackOrder, State # @TODO: refactor @@ -77,117 +72,4 @@ def on_loader_end(self, state: State): np.save(f"{self.out_prefix}/{suffix}.npy", value) -class InferMaskCallback(Callback): - """@TODO: Docs. Contribution is welcome.""" - - def __init__( - self, - out_dir=None, - out_prefix=None, - input_key=None, - output_key=None, - name_key=None, - mean=None, - std=None, - threshold: float = 0.5, - mask_strength: float = 0.5, - mask_type: str = "soft", - ): - """ - Args: - @TODO: Docs. Contribution is welcome - """ - super().__init__(CallbackOrder.Internal) - self.out_dir = out_dir - self.out_prefix = out_prefix - self.mean = mean or np.array([0.485, 0.456, 0.406]) - self.std = std or np.array([0.229, 0.224, 0.225]) - assert input_key is not None - assert output_key is not None - self.threshold = threshold - self.mask_strength = mask_strength - self.mask_type = mask_type - self.input_key = input_key - self.output_key = output_key - self.name_key = name_key - self.counter = 0 - self._keys_from_state = ["out_dir", "out_prefix"] - - def on_stage_start(self, state: State): - """Stage start hook. - - Args: - state (State): current state - """ - for key in self._keys_from_state: - value = getattr(state, key, None) - if value is not None: - setattr(self, key, value) - # assert self.out_prefix is not None - self.out_prefix = ( - self.out_prefix if self.out_prefix is not None else "" - ) - if self.out_dir is not None: - self.out_prefix = str(self.out_dir) + "/" + str(self.out_prefix) - os.makedirs(os.path.dirname(self.out_prefix), exist_ok=True) - - def on_loader_start(self, state: State): - """Loader start hook. - - Args: - state (State): current state - """ - lm = state.loader_name - os.makedirs(f"{self.out_prefix}/{lm}/", exist_ok=True) - - def on_batch_end(self, state: State): - """Batch end hook. - - Args: - state (State): current state - """ - lm = state.loader_name - names = state.input.get(self.name_key, []) - - features = state.input[self.input_key].detach().cpu() - images = utils.tensor_to_ndimage(features) - - logits = state.output[self.output_key] - logits = ( - torch.unsqueeze_(logits, dim=1) - if len(logits.shape) < 4 - else logits - ) - - if self.mask_type == "soft": - probabilities = torch.sigmoid(logits) - else: - probabilities = F.softmax(logits, dim=1) - probabilities = probabilities.detach().cpu().numpy() - - masks = [] - for probability in probabilities: - mask = np.zeros_like(probability[0], dtype=np.int32) - for i, ch in enumerate(probability): - mask[ch >= self.threshold] = i + 1 - masks.append(mask) - - for i, (image, mask) in enumerate(zip(images, masks)): - try: - suffix = names[i] - except IndexError: - suffix = f"{self.counter:06d}" - self.counter += 1 - - mask = label2rgb(mask, bg_label=0) - - image = ( - image * (1 - self.mask_strength) + mask * self.mask_strength - ) - image = (image * 255).clip(0, 255).round().astype(np.uint8) - - filename = f"{self.out_prefix}/{lm}/{suffix}.jpg" - imageio.imwrite(filename, image) - - -__all__ = ["InferCallback", "InferMaskCallback"] +__all__ = ["InferCallback"] diff --git a/catalyst/dl/experiment/config.py b/catalyst/dl/experiment/config.py index a3e18486d2..febe6a0ece 100644 --- a/catalyst/dl/experiment/config.py +++ b/catalyst/dl/experiment/config.py @@ -32,7 +32,7 @@ SCHEDULERS, TRANSFORMS, ) -from catalyst.utils.tools.typing import Criterion, Model, Optimizer, Scheduler +from catalyst.tools.typing import Criterion, Model, Optimizer, Scheduler class ConfigExperiment(_StageBasedExperiment): diff --git a/catalyst/dl/experiment/core.py b/catalyst/dl/experiment/core.py index 6d55d79ce1..686a45d2b6 100644 --- a/catalyst/dl/experiment/core.py +++ b/catalyst/dl/experiment/core.py @@ -19,8 +19,8 @@ ValidationManagerCallback, VerboseLogger, ) -from catalyst.utils.tools.settings import STAGE_INFER_PREFIX -from catalyst.utils.tools.typing import Criterion, Model, Optimizer, Scheduler +from catalyst.tools import settings +from catalyst.tools.typing import Criterion, Model, Optimizer, Scheduler class Experiment(_StageBasedExperiment): @@ -156,7 +156,7 @@ def process_loaders( loaders = utils.get_loaders_from_params( initial_seed=initial_seed, **datasets, ) - if not stage.startswith(STAGE_INFER_PREFIX): # train stage + if not stage.startswith(settings.stage_infer_prefix): # train stage if len(loaders) == 1: valid_loader = list(loaders.keys())[0] warnings.warn( diff --git a/catalyst/dl/experiment/supervised.py b/catalyst/dl/experiment/supervised.py index b4c2d228d4..c92a989f7d 100644 --- a/catalyst/dl/experiment/supervised.py +++ b/catalyst/dl/experiment/supervised.py @@ -8,7 +8,7 @@ OptimizerCallback, SchedulerCallback, ) -from catalyst.utils.tools.typing import Criterion, Optimizer, Scheduler +from catalyst.tools.typing import Criterion, Optimizer, Scheduler from .core import Experiment diff --git a/catalyst/dl/registry.py b/catalyst/dl/registry.py index 7402705706..6dd97270b6 100644 --- a/catalyst/dl/registry.py +++ b/catalyst/dl/registry.py @@ -18,7 +18,7 @@ TRANSFORMS, ) from catalyst.core.registry import Callback, CALLBACKS -from catalyst.utils.tools.registry import Registry +from catalyst.tools.registry import Registry def _callbacks_loader(r: Registry): diff --git a/catalyst/dl/runner/core.py b/catalyst/dl/runner/core.py index 54426bbaac..c2ce39bfcb 100644 --- a/catalyst/dl/runner/core.py +++ b/catalyst/dl/runner/core.py @@ -13,7 +13,7 @@ State, ) from catalyst.dl import Experiment, utils -from catalyst.utils.tools.typing import ( +from catalyst.tools.typing import ( Criterion, Device, Model, diff --git a/catalyst/dl/runner/supervised.py b/catalyst/dl/runner/supervised.py index 730126dc99..e846e397fc 100644 --- a/catalyst/dl/runner/supervised.py +++ b/catalyst/dl/runner/supervised.py @@ -4,7 +4,7 @@ import torch from catalyst.dl import State, SupervisedExperiment -from catalyst.utils.tools.typing import Device, Model +from catalyst.tools.typing import Device, Model from .core import Runner diff --git a/catalyst/dl/scripts/trace.py b/catalyst/dl/scripts/trace.py index fb28861bd9..ed7538b70d 100644 --- a/catalyst/dl/scripts/trace.py +++ b/catalyst/dl/scripts/trace.py @@ -6,7 +6,7 @@ import torch from catalyst.dl import Experiment, utils -from catalyst.utils.tools.typing import Device +from catalyst.tools.typing import Device def trace_model_from_checkpoint( diff --git a/catalyst/dl/utils/trace.py b/catalyst/dl/utils/trace.py index 71cb2c9932..22c96a3684 100644 --- a/catalyst/dl/utils/trace.py +++ b/catalyst/dl/utils/trace.py @@ -6,12 +6,12 @@ from torch import nn from torch.jit import ScriptModule +from catalyst.tools.typing import Device, Model from catalyst.utils import ( assert_fp16_available, get_fn_argsnames, set_requires_grad, ) -from catalyst.utils.tools.typing import Device, Model if TYPE_CHECKING: from catalyst.dl import Runner # noqa: F401 diff --git a/catalyst/utils/tools/__init__.py b/catalyst/tools/__init__.py similarity index 81% rename from catalyst/utils/tools/__init__.py rename to catalyst/tools/__init__.py index 74afcb981a..f489fdd5bb 100644 --- a/catalyst/utils/tools/__init__.py +++ b/catalyst/tools/__init__.py @@ -1,4 +1,5 @@ # flake8: noqa from .frozen_class import FrozenClass from .registry import Registry, RegistryException +from .settings import settings from .time_manager import TimeManager diff --git a/catalyst/utils/tools/frozen_class.py b/catalyst/tools/frozen_class.py similarity index 100% rename from catalyst/utils/tools/frozen_class.py rename to catalyst/tools/frozen_class.py diff --git a/catalyst/utils/tools/registry.py b/catalyst/tools/registry.py similarity index 100% rename from catalyst/utils/tools/registry.py rename to catalyst/tools/registry.py diff --git a/catalyst/tools/settings.py b/catalyst/tools/settings.py new file mode 100644 index 0000000000..d1618736c0 --- /dev/null +++ b/catalyst/tools/settings.py @@ -0,0 +1,280 @@ +from typing import Any, Dict, List, Optional, Tuple +import configparser +import logging +import os + +from catalyst.tools.frozen_class import FrozenClass + +logger = logging.getLogger(__name__) + + +class Settings(FrozenClass): + def __init__( + self, + contrib_required: bool = False, + cv_required: bool = False, + ml_required: bool = False, + nlp_required: bool = False, + alchemy_logger_required: Optional[bool] = None, + neptune_logger_required: Optional[bool] = None, + visdom_logger_required: Optional[bool] = None, + wandb_logger_required: Optional[bool] = None, + plotly_required: Optional[bool] = None, + telegram_logger_token: Optional[str] = None, + telegram_logger_chat_id: Optional[str] = None, + use_lz4: bool = False, + use_pyarrow: bool = False, + albumentations_required: Optional[bool] = None, + segmentation_models_required: Optional[bool] = None, + use_libjpeg_turbo: bool = False, + nmslib_required: Optional[bool] = None, + transformers_required: Optional[bool] = None, + ): + # [catalyst] + self.contrib_required: bool = contrib_required + self.cv_required: bool = cv_required + self.ml_required: bool = ml_required + self.nlp_required: bool = nlp_required + + # stages + self.stage_train_prefix: str = "train" + self.stage_valid_prefix: str = "valid" + self.stage_infer_prefix: str = "infer" + + # loader + self.loader_train_prefix: str = "train" + self.loader_valid_prefix: str = "valid" + self.loader_infer_prefix: str = "infer" + + # [catalyst-contrib] + self.alchemy_logger_required: bool = self._optional_value( + alchemy_logger_required, default=contrib_required + ) + self.neptune_logger_required: bool = self._optional_value( + neptune_logger_required, default=contrib_required + ) + self.visdom_logger_required: bool = self._optional_value( + visdom_logger_required, default=contrib_required + ) + self.wandb_logger_required: bool = self._optional_value( + wandb_logger_required, default=contrib_required + ) + self.plotly_required: bool = self._optional_value( + plotly_required, default=contrib_required + ) + self.telegram_logger_token: str = telegram_logger_token + self.telegram_logger_chat_id: str = telegram_logger_chat_id + self.use_lz4: bool = use_lz4 + self.use_pyarrow: bool = use_pyarrow + + # [catalyst-cv] + self.albumentations_required: bool = self._optional_value( + albumentations_required, default=cv_required + ) + self.segmentation_models_required: bool = self._optional_value( + segmentation_models_required, default=cv_required + ) + self.use_libjpeg_turbo: bool = use_libjpeg_turbo + + # [catalyst-ml] + self.nmslib_required: bool = self._optional_value( + nmslib_required, default=ml_required + ) + + # [catalyst-nlp] + self.transformers_required: bool = self._optional_value( + transformers_required, default=nlp_required + ) + + @staticmethod + def _optional_value(value, default): + return value if value is not None else default + + def type_hint(self, key: str): + # return get_type_hints(self).get(key, None) + return type(getattr(self, key, None)) + + @staticmethod + def parse() -> "Settings": + kwargrs = MergedConfigParser(ConfigFileFinder("catalyst")).parse() + return Settings(**kwargrs) + + +default_settings = Settings() + + +class ConfigFileFinder: + """Encapsulate the logic for finding and reading config files. + + Main origins of inspiration: + - https://gitlab.com/pwoolvett/flake8 (MIT License) + - https://github.com/python/mypy (MIT License) + """ + + def __init__(self, program_name: str) -> None: + """Initialize object to find config files. + + Args: + program_name (str): Name of the current program (e.g., catalyst). + """ + # user configuration file + self.program_name = program_name + self.user_config_file = self._user_config_file(program_name) + + # list of filenames to find in the local/project directory + self.project_filenames = ("setup.cfg", "tox.ini", f".{program_name}") + + self.local_directory = os.path.abspath(os.curdir) + + @staticmethod + def _user_config_file(program_name: str) -> str: + if os.name == "nt": # if running on Windows + home_dir = os.path.expanduser("~") + config_file_basename = f".{program_name}" + else: + home_dir = os.environ.get( + "XDG_CONFIG_HOME", os.path.expanduser("~/.config") + ) + config_file_basename = program_name + + return os.path.join(home_dir, config_file_basename) + + @staticmethod + def _read_config( + *files: str, + ) -> Tuple[configparser.RawConfigParser, List[str]]: + config = configparser.RawConfigParser() + + found_files: List[str] = [] + for filename in files: + try: + found_files.extend(config.read(filename)) + except UnicodeDecodeError: + logger.exception( + f"There was an error decoding a config file." + f" The file with a problem was {filename}." + ) + except configparser.ParsingError: + logger.exception( + f"There was an error trying to parse a config file." + f" The file with a problem was {filename}." + ) + + return config, found_files + + def local_config_files(self) -> List[str]: # noqa: D202 + """ + Find all local config files which actually exist. + + Returns: + List[str]: List of files that exist that are + local project config files with extra config files + appended to that list (which also exist). + """ + + def generate_possible_local_files(): + """Find and generate all local config files.""" + parent = tail = os.getcwd() + found_config_files = False + while tail and not found_config_files: + for project_filename in self.project_filenames: + filename = os.path.abspath( + os.path.join(parent, project_filename) + ) + if os.path.exists(filename): + yield filename + found_config_files = True + self.local_directory = parent + (parent, tail) = os.path.split(parent) + + return list(generate_possible_local_files()) + + def local_configs(self): + """Parse all local config files into one config object.""" + config, found_files = self._read_config(*self.local_config_files()) + if found_files: + logger.debug(f"Found local configuration files: {found_files}") + return config + + def user_config(self): + """Parse the user config file into a config object.""" + config, found_files = self._read_config(self.user_config_file) + if found_files: + logger.debug(f"Found user configuration files: {found_files}") + return config + + +class MergedConfigParser: + """Encapsulate merging different types of configuration files. + + This parses out the options registered that were specified in the + configuration files, handles extra configuration files, and returns + dictionaries with the parsed values. + + Main origins of inspiration: + - https://gitlab.com/pwoolvett/flake8 (MIT License) + - https://github.com/python/mypy (MIT License) + """ + + #: Set of actions that should use the + #: :meth:`~configparser.RawConfigParser.getbool` method. + GETBOOL_ACTIONS = {"store_true", "store_false"} + + def __init__(self, config_finder: ConfigFileFinder): + """Initialize the MergedConfigParser instance. + + Args: + config_finder (ConfigFileFinder): Initialized ConfigFileFinder. + """ + self.program_name = config_finder.program_name + self.config_finder = config_finder + + def _normalize_value(self, option, value): + final_value = option.normalize( + value, self.config_finder.local_directory + ) + logger.debug( + f"{value} has been normalized to {final_value}" + f" for option '{option.config_name}'", + ) + return final_value + + def _parse_config(self, config_parser): + type2method = { + bool: config_parser.getboolean, + int: config_parser.getint, + } + + config_dict: Dict[str, Any] = {} + if config_parser.has_section(self.program_name): + for option_name in config_parser.options(self.program_name): + type_ = default_settings.type_hint(option_name) + method = type2method.get(type_, config_parser.get) + config_dict[option_name] = method( + self.program_name, option_name + ) + + return config_dict + + def parse(self) -> dict: + """Parse and return the local and user config files. + + First this copies over the parsed local configuration and then + iterates over the options in the user configuration and sets them if + they were not set by the local configuration file. + + Returns: + dict: Dictionary of the parsed and merged configuration options. + """ + user_config = self._parse_config(self.config_finder.user_config()) + config = self._parse_config(self.config_finder.local_configs()) + + for option, value in user_config.items(): + config.setdefault(option, value) + + return config + + +settings = Settings.parse() + +__all__ = ["settings"] diff --git a/catalyst/utils/tools/time_manager.py b/catalyst/tools/time_manager.py similarity index 100% rename from catalyst/utils/tools/time_manager.py rename to catalyst/tools/time_manager.py diff --git a/catalyst/utils/tools/typing.py b/catalyst/tools/typing.py similarity index 100% rename from catalyst/utils/tools/typing.py rename to catalyst/tools/typing.py diff --git a/catalyst/utils/distributed.py b/catalyst/utils/distributed.py index fb8a594d60..35e2776f04 100644 --- a/catalyst/utils/distributed.py +++ b/catalyst/utils/distributed.py @@ -14,7 +14,7 @@ import torch.distributed from catalyst import __version__ -from catalyst.utils.tools.typing import ( +from catalyst.tools.typing import ( Criterion, Device, Model, diff --git a/catalyst/utils/metrics/accuracy.py b/catalyst/utils/metrics/accuracy.py index 98a50a7524..8b17e82270 100644 --- a/catalyst/utils/metrics/accuracy.py +++ b/catalyst/utils/metrics/accuracy.py @@ -6,7 +6,7 @@ """ import numpy as np -from catalyst.utils import get_activation_fn +from catalyst.utils.torch import get_activation_fn def accuracy( diff --git a/catalyst/utils/metrics/dice.py b/catalyst/utils/metrics/dice.py index e6b16f9dc5..8def5cf883 100644 --- a/catalyst/utils/metrics/dice.py +++ b/catalyst/utils/metrics/dice.py @@ -3,7 +3,7 @@ """ import torch -from catalyst.utils import get_activation_fn +from catalyst.utils.torch import get_activation_fn def dice( diff --git a/catalyst/utils/metrics/f1_score.py b/catalyst/utils/metrics/f1_score.py index 7abde043ce..bef50ada55 100644 --- a/catalyst/utils/metrics/f1_score.py +++ b/catalyst/utils/metrics/f1_score.py @@ -3,7 +3,7 @@ """ import torch -from catalyst.utils import get_activation_fn +from catalyst.utils.torch import get_activation_fn def f1_score( diff --git a/catalyst/utils/metrics/iou.py b/catalyst/utils/metrics/iou.py index a061e42fac..28d33579a7 100644 --- a/catalyst/utils/metrics/iou.py +++ b/catalyst/utils/metrics/iou.py @@ -7,7 +7,7 @@ import torch -from catalyst.utils import get_activation_fn +from catalyst.utils.torch import get_activation_fn def iou( diff --git a/catalyst/utils/tests/test_registry.py b/catalyst/utils/tests/test_registry.py index 521eeaaea2..4433e5420d 100644 --- a/catalyst/utils/tests/test_registry.py +++ b/catalyst/utils/tests/test_registry.py @@ -1,6 +1,6 @@ import pytest -from catalyst.utils.tools.registry import Registry, RegistryException +from catalyst.tools.registry import Registry, RegistryException from . import registery_foo as module from .registery_foo import foo diff --git a/catalyst/utils/tools/settings.py b/catalyst/utils/tools/settings.py deleted file mode 100644 index 5b4dbf071c..0000000000 --- a/catalyst/utils/tools/settings.py +++ /dev/null @@ -1,11 +0,0 @@ -STATE_MAIN_METRIC = "loss" - -# stages -STAGE_INFER_PREFIX = "infer" - -# loader -LOADER_TRAIN_PREFIX = "train" -LOADER_VALID_PREFIX = "valid" -LOADER_INFER_PREFIX = "infer" - -# callbacks diff --git a/catalyst/utils/torch.py b/catalyst/utils/torch.py index d6af4e4cdc..8617179e79 100644 --- a/catalyst/utils/torch.py +++ b/catalyst/utils/torch.py @@ -10,7 +10,7 @@ import torch.backends from torch.backends import cudnn -from catalyst.utils.tools.typing import Device, Model, Optimizer +from catalyst.tools.typing import Device, Model, Optimizer from .dict import merge_dicts diff --git a/docs/api/contrib.rst b/docs/api/contrib.rst index ae51f15ac8..8ab001d656 100644 --- a/docs/api/contrib.rst +++ b/docs/api/contrib.rst @@ -260,13 +260,6 @@ Serialization :undoc-members: :show-inheritance: -Text -~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.contrib.utils.text - :members: - :undoc-members: - :show-inheritance: - Visualization ~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: catalyst.contrib.utils.visualization @@ -282,4 +275,4 @@ Tensorboard .. automodule:: catalyst.contrib.utils.tools.tensorboard :members: :undoc-members: - :show-inheritance: \ No newline at end of file + :show-inheritance: diff --git a/docs/api/utils.rst b/docs/api/utils.rst index df551ab147..62675475fb 100644 --- a/docs/api/utils.rst +++ b/docs/api/utils.rst @@ -103,28 +103,28 @@ Tools Frozen Class ~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.utils.tools.frozen_class +.. automodule:: catalyst.tools.frozen_class :members: :undoc-members: :show-inheritance: Registry ~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.utils.tools.registry +.. automodule:: catalyst.tools.registry :members: :undoc-members: :show-inheritance: Time Manager ~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.utils.tools.time_manager +.. automodule:: catalyst.tools.time_manager :members: :undoc-members: :show-inheritance: Typing ~~~~~~~~~~~~~~~~~~~~~~ -.. automodule:: catalyst.utils.tools.typing +.. automodule:: catalyst.tools.typing :members: :undoc-members: :show-inheritance: diff --git a/docs/info/distributed.rst b/docs/info/distributed.rst index b02d4aa211..eb78e06675 100644 --- a/docs/info/distributed.rst +++ b/docs/info/distributed.rst @@ -53,7 +53,7 @@ For instance, here is a minimal script that trains a linear regression model. `Link to the projector script.`_ -.. _Link to the projector script.: https://github.com/catalyst-team/catalyst/blob/master/tests/_tests_scripts/z_docs_distributed_0.py +.. _Link to the projector script.: https://github.com/catalyst-team/catalyst/blob/master/tests/_tests_scripts/dl_z_docs_distributed_0.py Stage 1 - I just want distributed ------------------------------------------------ @@ -97,7 +97,7 @@ you can just pass ``distributed=True`` to ``.train`` call `Link to the stage-1 script.`_ -.. _Link to the stage-1 script.: https://github.com/catalyst-team/catalyst/blob/master/tests/_tests_scripts/z_docs_distributed_1.py +.. _Link to the stage-1 script.: https://github.com/catalyst-team/catalyst/blob/master/tests/_tests_scripts/dl_z_docs_distributed_1.py In this way Catalyst will try to automatically make your loaders work in distributed setup @@ -152,7 +152,7 @@ Let's make it more reusable: `Link to the stage-2 script.`_ -.. _Link to the stage-2 script.: https://github.com/catalyst-team/catalyst/blob/master/tests/_tests_scripts/z_docs_distributed_2.py +.. _Link to the stage-2 script.: https://github.com/catalyst-team/catalyst/blob/master/tests/_tests_scripts/dl_z_docs_distributed_2.py By this way we easily can transfer your datasets to distributed mode. But again, you recreate your dataset with each worker. Can we make it better? @@ -205,7 +205,7 @@ Yup, check this one, distributed training like a pro: `Link to the stage-3 script.`_ -.. _Link to the stage-3 script.: https://github.com/catalyst-team/catalyst/blob/master/tests/_tests_scripts/z_docs_distributed_3.py +.. _Link to the stage-3 script.: https://github.com/catalyst-team/catalyst/blob/master/tests/_tests_scripts/dl_z_docs_distributed_3.py Advantages, - you have control about what is going on with manual call of diff --git a/requirements/requirements-contrib.txt b/requirements/requirements-contrib.txt index 9ff58a1c52..c592d1b14a 100644 --- a/requirements/requirements-contrib.txt +++ b/requirements/requirements-contrib.txt @@ -1,3 +1,4 @@ # plotly>=4.1.0 neptune-client +visdom wandb diff --git a/requirements/requirements-cv.txt b/requirements/requirements-cv.txt index 7eb99c0b0a..727b40b1ae 100644 --- a/requirements/requirements-cv.txt +++ b/requirements/requirements-cv.txt @@ -1,2 +1,6 @@ albumentations==0.4.3 -segmentation-models-pytorch==0.1.0 \ No newline at end of file +imageio +opencv-python-headless +scikit-image>=0.14.2 +segmentation-models-pytorch==0.1.0 +torchvision>=0.3.0 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index e347a7c861..080f4ca866 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -17,13 +17,6 @@ tensorboardX crc32c>=1.7 plotly>=4.1.0 -# cv -> catalyst[cv] -torchvision>=0.3.0 -Pillow -imageio -opencv-python -scikit-image>=0.14.2 - # Used in scripts -> contrib (?) matplotlib pandas>=0.22 diff --git a/tests/_tests_cv_classification/experiment.py b/tests/_tests_cv_classification/experiment.py index ffa1b5712c..3171801cfb 100644 --- a/tests/_tests_cv_classification/experiment.py +++ b/tests/_tests_cv_classification/experiment.py @@ -1,8 +1,6 @@ from collections import OrderedDict -import torchvision -from torchvision import transforms - +from catalyst.contrib.data.dataset import Compose, MNIST, Normalize, ToTensor from catalyst.dl import ConfigExperiment @@ -16,9 +14,7 @@ def get_transforms(stage: str = None, mode: str = None): """ @TODO: Docs. Contribution is welcome """ - return transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ) + return Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) def get_datasets(self, stage: str, **kwargs): """ @@ -27,13 +23,13 @@ def get_datasets(self, stage: str, **kwargs): datasets = OrderedDict() if stage != "infer": - trainset = torchvision.datasets.MNIST( + trainset = MNIST( "./data", train=False, download=True, transform=Experiment.get_transforms(stage=stage, mode="train"), ) - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, @@ -43,7 +39,7 @@ def get_datasets(self, stage: str, **kwargs): datasets["train"] = trainset datasets["valid"] = testset else: - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, diff --git a/tests/_tests_cv_classification_experiment_registry/test1/experiment.py b/tests/_tests_cv_classification_experiment_registry/test1/experiment.py index dd1088291e..2deacc7950 100644 --- a/tests/_tests_cv_classification_experiment_registry/test1/experiment.py +++ b/tests/_tests_cv_classification_experiment_registry/test1/experiment.py @@ -1,8 +1,6 @@ from collections import OrderedDict -import torchvision -from torchvision import transforms - +from catalyst.contrib.data.dataset import Compose, MNIST, Normalize, ToTensor from catalyst.dl import ConfigExperiment @@ -16,9 +14,7 @@ def get_transforms(stage: str = None, mode: str = None): """ @TODO: Docs. Contribution is welcome """ - return transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ) + return Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) def get_datasets(self, stage: str, **kwargs): """ @@ -27,7 +23,7 @@ def get_datasets(self, stage: str, **kwargs): datasets = OrderedDict() if stage != "infer": - trainset = torchvision.datasets.MNIST( + trainset = MNIST( "./data", train=False, download=True, @@ -35,7 +31,7 @@ def get_datasets(self, stage: str, **kwargs): stage=stage, mode="train" ), ) - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, @@ -47,7 +43,7 @@ def get_datasets(self, stage: str, **kwargs): datasets["train"] = trainset datasets["valid"] = testset else: - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, diff --git a/tests/_tests_cv_classification_experiment_registry/test2/experiments/SimpleExperiment1.py b/tests/_tests_cv_classification_experiment_registry/test2/experiments/SimpleExperiment1.py index 9c96996773..98f46e7196 100644 --- a/tests/_tests_cv_classification_experiment_registry/test2/experiments/SimpleExperiment1.py +++ b/tests/_tests_cv_classification_experiment_registry/test2/experiments/SimpleExperiment1.py @@ -1,8 +1,6 @@ from collections import OrderedDict -import torchvision -from torchvision import transforms - +from catalyst.contrib.data.dataset import Compose, MNIST, Normalize, ToTensor from catalyst.dl import ConfigExperiment @@ -16,9 +14,7 @@ def get_transforms(stage: str = None, mode: str = None): """ @TODO: Docs. Contribution is welcome """ - return transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ) + return Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) def get_datasets(self, stage: str, **kwargs): """ @@ -27,7 +23,7 @@ def get_datasets(self, stage: str, **kwargs): datasets = OrderedDict() if stage != "infer": - trainset = torchvision.datasets.MNIST( + trainset = MNIST( "./data", train=False, download=True, @@ -35,7 +31,7 @@ def get_datasets(self, stage: str, **kwargs): stage=stage, mode="train" ), ) - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, @@ -47,7 +43,7 @@ def get_datasets(self, stage: str, **kwargs): datasets["train"] = trainset datasets["valid"] = testset else: - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, diff --git a/tests/_tests_cv_classification_experiment_registry/test2/experiments/SimpleExperiment2.py b/tests/_tests_cv_classification_experiment_registry/test2/experiments/SimpleExperiment2.py index 28075c9f3b..fe16d659fa 100644 --- a/tests/_tests_cv_classification_experiment_registry/test2/experiments/SimpleExperiment2.py +++ b/tests/_tests_cv_classification_experiment_registry/test2/experiments/SimpleExperiment2.py @@ -1,8 +1,6 @@ from collections import OrderedDict -import torchvision -from torchvision import transforms - +from catalyst.contrib.data.dataset import Compose, MNIST, Normalize, ToTensor from catalyst.dl import ConfigExperiment @@ -16,9 +14,7 @@ def get_transforms(stage: str = None, mode: str = None): """ @TODO: Docs. Contribution is welcome """ - return transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ) + return Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) def get_datasets(self, stage: str, **kwargs): """ @@ -27,7 +23,7 @@ def get_datasets(self, stage: str, **kwargs): datasets = OrderedDict() if stage != "infer": - trainset = torchvision.datasets.MNIST( + trainset = MNIST( "./data", train=False, download=True, @@ -35,7 +31,7 @@ def get_datasets(self, stage: str, **kwargs): stage=stage, mode="train" ), ) - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, @@ -47,7 +43,7 @@ def get_datasets(self, stage: str, **kwargs): datasets["train"] = trainset datasets["valid"] = testset else: - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, diff --git a/tests/_tests_cv_classification_transforms/experiment.py b/tests/_tests_cv_classification_transforms/experiment.py index 214d47b1da..8f83cf3657 100644 --- a/tests/_tests_cv_classification_transforms/experiment.py +++ b/tests/_tests_cv_classification_transforms/experiment.py @@ -1,12 +1,11 @@ from typing import Tuple from collections import OrderedDict -import torchvision - +from catalyst.contrib.data.dataset import MNIST as _MNIST from catalyst.dl.experiment import ConfigExperiment -class MNIST(torchvision.datasets.MNIST): +class MNIST(_MNIST): """`MNIST `_ Dataset.""" def __getitem__(self, index: int) -> Tuple: diff --git a/tests/_tests_dl_callbacks/experiment.py b/tests/_tests_dl_callbacks/experiment.py index 31d648120a..a71591daa2 100644 --- a/tests/_tests_dl_callbacks/experiment.py +++ b/tests/_tests_dl_callbacks/experiment.py @@ -1,9 +1,8 @@ from collections import OrderedDict from torch.utils.data import Subset -import torchvision -from torchvision import transforms +from catalyst.contrib.data.dataset import Compose, MNIST, Normalize, ToTensor from catalyst.dl import ConfigExperiment @@ -17,9 +16,7 @@ def get_transforms(stage: str = None, mode: str = None): """ @TODO: Docs. Contribution is welcome """ - return transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] - ) + return Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) def get_datasets(self, stage: str, n_samples: int = 320, **kwargs): """ @@ -28,13 +25,13 @@ def get_datasets(self, stage: str, n_samples: int = 320, **kwargs): datasets = OrderedDict() if stage != "infer": - trainset = torchvision.datasets.MNIST( + trainset = MNIST( "./data", train=False, download=True, transform=Experiment.get_transforms(stage=stage, mode="train"), ) - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, @@ -46,7 +43,7 @@ def get_datasets(self, stage: str, n_samples: int = 320, **kwargs): datasets["train"] = trainset datasets["valid"] = testset else: - testset = torchvision.datasets.MNIST( + testset = MNIST( "./data", train=False, download=True, diff --git a/tests/_tests_scripts/z_segmentation.py b/tests/_tests_scripts/cv_z_segmentation.py similarity index 90% rename from tests/_tests_scripts/z_segmentation.py rename to tests/_tests_scripts/cv_z_segmentation.py index cdd0a278de..09a797cdae 100644 --- a/tests/_tests_scripts/z_segmentation.py +++ b/tests/_tests_scripts/cv_z_segmentation.py @@ -40,8 +40,7 @@ import collections import numpy as np import torch -import torchvision -import torchvision.transforms as transforms +from catalyst.contrib.data.dataset import Compose, Normalize from catalyst.data import Augmentor from catalyst.dl import utils from catalyst.contrib.nn.criterion import ( @@ -83,7 +82,7 @@ def get_loaders(transform): return loaders -data_transform = transforms.Compose( +data_transform = Compose( [ Augmentor( dict_key="features", @@ -91,10 +90,7 @@ def get_loaders(transform): x.copy().astype(np.float32) / 255.0 ).unsqueeze_(0), ), - Augmentor( - dict_key="features", - augment_fn=transforms.Normalize((0.5,), (0.5,)), - ), + Augmentor(dict_key="features", augment_fn=Normalize((0.5,), (0.5,)),), Augmentor( dict_key="targets", augment_fn=lambda x: torch.from_numpy( @@ -205,7 +201,7 @@ def get_loaders(transform): # Multiclasses checks # lovasz LovaszLossMultiClass criterion -data_transform = transforms.Compose( +data_transform = Compose( [ Augmentor( dict_key="features", @@ -213,10 +209,7 @@ def get_loaders(transform): x.copy().astype(np.float32) / 255.0 ).unsqueeze_(0), ), - Augmentor( - dict_key="features", - augment_fn=transforms.Normalize((0.5,), (0.5,)), - ), + Augmentor(dict_key="features", augment_fn=Normalize((0.5,), (0.5,)),), Augmentor( dict_key="targets", augment_fn=lambda x: torch.from_numpy( @@ -251,7 +244,7 @@ def transform_targets(x): return np.vstack([x1, x2]) / 255.0 -data_transform = transforms.Compose( +data_transform = Compose( [ Augmentor( dict_key="features", @@ -259,10 +252,7 @@ def transform_targets(x): x.copy().astype(np.float32) / 255.0 ).unsqueeze_(0), ), - Augmentor( - dict_key="features", - augment_fn=transforms.Normalize((0.5,), (0.5,)), - ), + Augmentor(dict_key="features", augment_fn=Normalize((0.5,), (0.5,)),), Augmentor( dict_key="targets", augment_fn=lambda x: torch.from_numpy(transform_targets(x)), diff --git a/tests/_tests_scripts/z_unets.py b/tests/_tests_scripts/cv_z_unets.py similarity index 100% rename from tests/_tests_scripts/z_unets.py rename to tests/_tests_scripts/cv_z_unets.py diff --git a/tests/_tests_scripts/z_classification.py b/tests/_tests_scripts/dl_z_classification.py similarity index 96% rename from tests/_tests_scripts/z_classification.py rename to tests/_tests_scripts/dl_z_classification.py index 8c39f670f4..97e98a23d0 100644 --- a/tests/_tests_scripts/z_classification.py +++ b/tests/_tests_scripts/dl_z_classification.py @@ -16,28 +16,24 @@ import collections import torch -import torchvision -import torchvision.transforms as transforms + +from catalyst.contrib.data.dataset import MNIST, ToTensor, Compose, Normalize bs = 32 num_workers = 0 -data_transform = transforms.Compose( - [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] -) +data_transform = Compose([ToTensor(), Normalize((0.1307,), (0.3081,))]) loaders = collections.OrderedDict() -trainset = torchvision.datasets.MNIST( +trainset = MNIST( "./data", train=False, download=True, transform=data_transform ) trainloader = torch.utils.data.DataLoader( trainset, batch_size=bs, shuffle=True, num_workers=num_workers ) -testset = torchvision.datasets.MNIST( - "./data", train=False, download=True, transform=data_transform -) +testset = MNIST("./data", train=False, download=True, transform=data_transform) testloader = torch.utils.data.DataLoader( testset, batch_size=bs, shuffle=False, num_workers=num_workers ) diff --git a/tests/_tests_scripts/z_dl_contirb_functional.py b/tests/_tests_scripts/dl_z_contirb_functional.py similarity index 100% rename from tests/_tests_scripts/z_dl_contirb_functional.py rename to tests/_tests_scripts/dl_z_contirb_functional.py diff --git a/tests/_tests_scripts/z_distributed_01.py b/tests/_tests_scripts/dl_z_distributed_01.py similarity index 100% rename from tests/_tests_scripts/z_distributed_01.py rename to tests/_tests_scripts/dl_z_distributed_01.py diff --git a/tests/_tests_scripts/z_distributed_02.py b/tests/_tests_scripts/dl_z_distributed_02.py similarity index 100% rename from tests/_tests_scripts/z_distributed_02.py rename to tests/_tests_scripts/dl_z_distributed_02.py diff --git a/tests/_tests_scripts/z_distributed_03.py b/tests/_tests_scripts/dl_z_distributed_03.py similarity index 100% rename from tests/_tests_scripts/z_distributed_03.py rename to tests/_tests_scripts/dl_z_distributed_03.py diff --git a/tests/_tests_scripts/z_distributed_04.py b/tests/_tests_scripts/dl_z_distributed_04.py similarity index 100% rename from tests/_tests_scripts/z_distributed_04.py rename to tests/_tests_scripts/dl_z_distributed_04.py diff --git a/tests/_tests_scripts/z_distributed_05.py b/tests/_tests_scripts/dl_z_distributed_05.py similarity index 100% rename from tests/_tests_scripts/z_distributed_05.py rename to tests/_tests_scripts/dl_z_distributed_05.py diff --git a/tests/_tests_scripts/z_distributed_06.py b/tests/_tests_scripts/dl_z_distributed_06.py similarity index 100% rename from tests/_tests_scripts/z_distributed_06.py rename to tests/_tests_scripts/dl_z_distributed_06.py diff --git a/tests/_tests_scripts/z_distributed_07.py b/tests/_tests_scripts/dl_z_distributed_07.py similarity index 100% rename from tests/_tests_scripts/z_distributed_07.py rename to tests/_tests_scripts/dl_z_distributed_07.py diff --git a/tests/_tests_scripts/z_distributed_08.py b/tests/_tests_scripts/dl_z_distributed_08.py similarity index 100% rename from tests/_tests_scripts/z_distributed_08.py rename to tests/_tests_scripts/dl_z_distributed_08.py diff --git a/tests/_tests_scripts/z_distributed_09.py b/tests/_tests_scripts/dl_z_distributed_09.py similarity index 100% rename from tests/_tests_scripts/z_distributed_09.py rename to tests/_tests_scripts/dl_z_distributed_09.py diff --git a/tests/_tests_scripts/z_distributed_10.py b/tests/_tests_scripts/dl_z_distributed_10.py similarity index 100% rename from tests/_tests_scripts/z_distributed_10.py rename to tests/_tests_scripts/dl_z_distributed_10.py diff --git a/tests/_tests_scripts/z_distributed_11.py b/tests/_tests_scripts/dl_z_distributed_11.py similarity index 100% rename from tests/_tests_scripts/z_distributed_11.py rename to tests/_tests_scripts/dl_z_distributed_11.py diff --git a/tests/_tests_scripts/z_distributed_12.py b/tests/_tests_scripts/dl_z_distributed_12.py similarity index 100% rename from tests/_tests_scripts/z_distributed_12.py rename to tests/_tests_scripts/dl_z_distributed_12.py diff --git a/tests/_tests_scripts/z_distributed_13.py b/tests/_tests_scripts/dl_z_distributed_13.py similarity index 100% rename from tests/_tests_scripts/z_distributed_13.py rename to tests/_tests_scripts/dl_z_distributed_13.py diff --git a/tests/_tests_scripts/z_distributed_14.py b/tests/_tests_scripts/dl_z_distributed_14.py similarity index 100% rename from tests/_tests_scripts/z_distributed_14.py rename to tests/_tests_scripts/dl_z_distributed_14.py diff --git a/tests/_tests_scripts/z_distributed_15.py b/tests/_tests_scripts/dl_z_distributed_15.py similarity index 100% rename from tests/_tests_scripts/z_distributed_15.py rename to tests/_tests_scripts/dl_z_distributed_15.py diff --git a/tests/_tests_scripts/z_distributed_16.py b/tests/_tests_scripts/dl_z_distributed_16.py similarity index 100% rename from tests/_tests_scripts/z_distributed_16.py rename to tests/_tests_scripts/dl_z_distributed_16.py diff --git a/tests/_tests_scripts/z_distributed_17.py b/tests/_tests_scripts/dl_z_distributed_17.py similarity index 100% rename from tests/_tests_scripts/z_distributed_17.py rename to tests/_tests_scripts/dl_z_distributed_17.py diff --git a/tests/_tests_scripts/z_docs_distributed_0.py b/tests/_tests_scripts/dl_z_docs_distributed_0.py similarity index 100% rename from tests/_tests_scripts/z_docs_distributed_0.py rename to tests/_tests_scripts/dl_z_docs_distributed_0.py diff --git a/tests/_tests_scripts/z_docs_distributed_1.py b/tests/_tests_scripts/dl_z_docs_distributed_1.py similarity index 100% rename from tests/_tests_scripts/z_docs_distributed_1.py rename to tests/_tests_scripts/dl_z_docs_distributed_1.py diff --git a/tests/_tests_scripts/z_docs_distributed_2.py b/tests/_tests_scripts/dl_z_docs_distributed_2.py similarity index 100% rename from tests/_tests_scripts/z_docs_distributed_2.py rename to tests/_tests_scripts/dl_z_docs_distributed_2.py diff --git a/tests/_tests_scripts/z_docs_distributed_3.py b/tests/_tests_scripts/dl_z_docs_distributed_3.py similarity index 100% rename from tests/_tests_scripts/z_docs_distributed_3.py rename to tests/_tests_scripts/dl_z_docs_distributed_3.py diff --git a/tests/_tests_scripts/z_mvp_distributed_mnist_ae.py b/tests/_tests_scripts/dl_z_mvp_distributed_mnist_ae.py similarity index 93% rename from tests/_tests_scripts/z_mvp_distributed_mnist_ae.py rename to tests/_tests_scripts/dl_z_mvp_distributed_mnist_ae.py index d55479fda6..7071a819c8 100644 --- a/tests/_tests_scripts/z_mvp_distributed_mnist_ae.py +++ b/tests/_tests_scripts/dl_z_mvp_distributed_mnist_ae.py @@ -4,10 +4,9 @@ import torch from torch import nn from torch.nn import functional as F -from torchvision import transforms -from torchvision.datasets import MNIST from catalyst import dl, utils +from catalyst.contrib.data.dataset import MNIST, ToTensor from catalyst.utils import metrics @@ -78,10 +77,7 @@ def datasets_fn(): Docs. """ dataset = MNIST( - os.getcwd(), - train=False, - download=True, - transform=transforms.ToTensor(), + os.getcwd(), train=False, download=True, transform=ToTensor(), ) return {"train": dataset, "valid": dataset} diff --git a/tests/_tests_scripts/z_mvp_mnist.py b/tests/_tests_scripts/dl_z_mvp_mnist.py similarity index 95% rename from tests/_tests_scripts/z_mvp_mnist.py rename to tests/_tests_scripts/dl_z_mvp_mnist.py index 021ba2d6d6..0c0d4c243d 100644 --- a/tests/_tests_scripts/z_mvp_mnist.py +++ b/tests/_tests_scripts/dl_z_mvp_mnist.py @@ -4,10 +4,9 @@ import torch from torch.nn import functional as F from torch.utils.data import DataLoader -from torchvision.datasets import MNIST -from torchvision.transforms import ToTensor from catalyst import dl +from catalyst.contrib.data.dataset import MNIST, ToTensor from catalyst.utils import metrics model = torch.nn.Linear(28 * 28, 10) diff --git a/tests/_tests_scripts/z_mvp_mnist_ae.py b/tests/_tests_scripts/dl_z_mvp_mnist_ae.py similarity index 86% rename from tests/_tests_scripts/z_mvp_mnist_ae.py rename to tests/_tests_scripts/dl_z_mvp_mnist_ae.py index 3188a06b2d..e9d0a76420 100644 --- a/tests/_tests_scripts/z_mvp_mnist_ae.py +++ b/tests/_tests_scripts/dl_z_mvp_mnist_ae.py @@ -5,10 +5,9 @@ from torch import nn from torch.nn import functional as F from torch.utils.data import DataLoader -from torchvision import transforms -from torchvision.datasets import MNIST from catalyst import dl +from catalyst.contrib.data.dataset import MNIST, ToTensor from catalyst.utils import metrics @@ -64,19 +63,13 @@ def main(): loaders = { "train": DataLoader( MNIST( - os.getcwd(), - train=False, - download=True, - transform=transforms.ToTensor(), + os.getcwd(), train=False, download=True, transform=ToTensor(), ), batch_size=32, ), "valid": DataLoader( MNIST( - os.getcwd(), - train=False, - download=True, - transform=transforms.ToTensor(), + os.getcwd(), train=False, download=True, transform=ToTensor(), ), batch_size=32, ), diff --git a/tests/_tests_scripts/z_mvp_mnist_gan.py b/tests/_tests_scripts/dl_z_mvp_mnist_gan.py similarity index 95% rename from tests/_tests_scripts/z_mvp_mnist_gan.py rename to tests/_tests_scripts/dl_z_mvp_mnist_gan.py index 1e667b47aa..9dca480661 100644 --- a/tests/_tests_scripts/z_mvp_mnist_gan.py +++ b/tests/_tests_scripts/dl_z_mvp_mnist_gan.py @@ -5,10 +5,9 @@ from torch import nn from torch.nn import functional as F from torch.utils.data import DataLoader -from torchvision import transforms -from torchvision.datasets import MNIST from catalyst import dl +from catalyst.contrib.data.dataset import MNIST, ToTensor from catalyst.contrib.nn.modules import Flatten, GlobalMaxPool2d, Lambda LATENT_DIM = 128 @@ -97,10 +96,7 @@ def main(): loaders = { "train": DataLoader( MNIST( - os.getcwd(), - train=True, - download=True, - transform=transforms.ToTensor(), + os.getcwd(), train=True, download=True, transform=ToTensor(), ), batch_size=32, ), diff --git a/tests/_tests_scripts/z_mvp_mnist_unet.py b/tests/_tests_scripts/dl_z_mvp_mnist_unet.py similarity index 86% rename from tests/_tests_scripts/z_mvp_mnist_unet.py rename to tests/_tests_scripts/dl_z_mvp_mnist_unet.py index bef3385209..f091e7735c 100644 --- a/tests/_tests_scripts/z_mvp_mnist_unet.py +++ b/tests/_tests_scripts/dl_z_mvp_mnist_unet.py @@ -5,10 +5,9 @@ from torch import nn from torch.nn import functional as F from torch.utils.data import DataLoader -from torchvision import transforms -from torchvision.datasets import MNIST from catalyst import dl +from catalyst.contrib.data.dataset import MNIST, ToTensor from catalyst.utils import metrics @@ -66,19 +65,13 @@ def main(): loaders = { "train": DataLoader( MNIST( - os.getcwd(), - train=False, - download=True, - transform=transforms.ToTensor(), + os.getcwd(), train=False, download=True, transform=ToTensor(), ), batch_size=32, ), "valid": DataLoader( MNIST( - os.getcwd(), - train=False, - download=True, - transform=transforms.ToTensor(), + os.getcwd(), train=False, download=True, transform=ToTensor(), ), batch_size=32, ), diff --git a/tests/_tests_scripts/z_mvp_mnist_vae.py b/tests/_tests_scripts/dl_z_mvp_mnist_vae.py similarity index 90% rename from tests/_tests_scripts/z_mvp_mnist_vae.py rename to tests/_tests_scripts/dl_z_mvp_mnist_vae.py index e60f587f9c..c44b5c53ec 100644 --- a/tests/_tests_scripts/z_mvp_mnist_vae.py +++ b/tests/_tests_scripts/dl_z_mvp_mnist_vae.py @@ -7,10 +7,9 @@ from torch import nn from torch.nn import functional as F from torch.utils.data import DataLoader -from torchvision import transforms -from torchvision.datasets import MNIST from catalyst import dl +from catalyst.contrib.data.dataset import MNIST, ToTensor from catalyst.utils import metrics LOG_SCALE_MAX = 2 @@ -104,19 +103,13 @@ def main(): loaders = { "train": DataLoader( MNIST( - os.getcwd(), - train=False, - download=True, - transform=transforms.ToTensor(), + os.getcwd(), train=False, download=True, transform=ToTensor(), ), batch_size=32, ), "valid": DataLoader( MNIST( - os.getcwd(), - train=False, - download=True, - transform=transforms.ToTensor(), + os.getcwd(), train=False, download=True, transform=ToTensor(), ), batch_size=32, ), diff --git a/tests/_tests_scripts/z_mvp_projector.py b/tests/_tests_scripts/dl_z_mvp_projector.py similarity index 100% rename from tests/_tests_scripts/z_mvp_projector.py rename to tests/_tests_scripts/dl_z_mvp_projector.py