Merge pull request #206 from raphaelrpl/feature/refactor

Review documentation sphinx according to cookiecutter-bdc and prepare to release 0.6.0
brazil-data-cube · Dec 3, 2020 · 6bf8933 · 6bf8933
2 parents a316c54 + 79ef93d
commit 6bf8933
Show file tree

Hide file tree

Showing 39 changed files with 388 additions and 170 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -69,7 +69,7 @@ instance/
 .scrapy
 
 # Sphinx documentation
-docs/_build/
+docs/sphinx/_build/
 
 # PyBuilder
 target/

diff --git a/.gitignore b/.gitignore
@@ -69,7 +69,7 @@ instance/
 .scrapy
 
 # Sphinx documentation
-docs/_build/
+docs/sphinx/_build/
 
 # PyBuilder
 target/

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -10,10 +10,22 @@
 Changes
 =======
 
-Version 0.4.1
--------------
 
-Released 2020-09-08
+Version 0.6.0 (2020-12-03)
+--------------------------
+
+- Fix provider_id not being saved in `bdc.item' - provider_id `#202 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/202>`_, `#140 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/140>`_.
+- Fix publish item transaction error `#87 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/87>`_.
+- Fix wrong thumbnail path for Landsat products `#180 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/180>`_.
+- Add support with `BDC-Catalog 0.6.4 <http://bdc-catalog.readthedocs.io/>`_, `#174 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/174>`_.
+- Add support to change default name for collections `#182 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/182>`_.
+- Add support with `BDC-Collectors <https://github.com/brazil-data-cube/bdc-collectors>`_ to search and collect data product from different providers `PR 187 <https://github.com/brazil-data-cube/bdc-collection-builder/pull/187>`_.
+- Add Harmonization support (using extras `pip install -e .[harmonization]`) `#138 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/138>`_.
+- Generate vegetation band indexes dynamically relying on `bdc.bands.metadata` `#164 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/164>`_, commit `0fe15de <https://github.com/brazil-data-cube/bdc-collection-builder/commit/0fe15debceb912144a995d82eb68a7a2b1595340>`_.
+
+
+Version 0.4.1 (2020-09-08)
+--------------------------
 
 Bug fixes:
 
@@ -24,10 +36,8 @@ Changes:
     - Data synchronization with Amazon Simple Storage (S3) - `#170 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/170>`_
 
 
-Version 0.4.0
--------------
-
-Released 2020-08-25
+Version 0.4.0 (2020-08-25)
+--------------------------
 
 - Add `LaSRC 2.0 <https://github.com/USGS-EROS/espa-surface-reflectance>`_ with `FMask 4.2 <https://github.com/GERSL/Fmask>`_ on collections Landsat-8 and Sentinel-2 - `#156 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/156>`_
 - Fix band name "quality" is wrong for Landsat 8 using LaSRC and Fmask - `#162 <https://github.com/brazil-data-cube/bdc-collection-builder/issues/162>`_
@@ -40,10 +50,8 @@ Released 2020-08-25
     - Restart a task by scene_id, status
 
 
-Version 0.2.0
--------------
-
-Released 2020-04-29
+Version 0.2.0 (2020-04-29)
+--------------------------
 
 - First experimental version.
 - Metadata ingestion of Image Collections.

diff --git a/INSTALL.rst b/INSTALL.rst
@@ -84,7 +84,7 @@ Generate the documentation::
 
 The above command will generate the documentation in HTML and it will place it under::
 
-    doc/sphinx/_build/html/
+    docs/sphinx/_build/html/
 
 
 Running in Development Mode

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -6,7 +6,7 @@
 # under the terms of the MIT License; see LICENSE file for more details.
 #
 
-exclude doc/sphinx/_build
+exclude docs/sphinx/_build
 exclude migrations
 exclude espa-science
 exclude sen2cor
@@ -21,16 +21,18 @@ include *.txt
 include LICENSE
 include pytest.ini
 include alembic.ini
-recursive-exclude doc/sphinx/_build *
+recursive-exclude docs/sphinx/_build *
 recursive-exclude migrations *
 recursive-exclude sen2cor *
 recursive-exclude espa-science *
 recursive-exclude docker *
 recursive-include bdc_collection_builder *.py
-recursive-include doc *.bat
-recursive-include doc *.css
-recursive-include doc *.py
-recursive-include doc *.rst
-recursive-include doc Makefile
+recursive-include docs *.bat
+recursive-include docs *.css
+recursive-include docs *.py
+recursive-include docs *.rst
+recursive-include docs *.ico
+recursive-include docs *.png
+recursive-include docs Makefile
 recursive-include examples *.py
 recursive-include tests *.py
diff --git a/USING.rst b/USING.rst
@@ -39,7 +39,7 @@ The resource `/api/radcor` is used to dispatch tasks for both data collect, proc
         if you need to `download` data, generates a Surface Reflectance (`correction`) and then publish data `publish`,
         you can chain the tasks as following:
 
-        .. code-block:: txt
+        .. code-block::
 
             "tasks": [
                 "type": "download",

diff --git a/bdc_collection_builder/alembic/06fab6583881_activities.py b/bdc_collection_builder/alembic/06fab6583881_activities.py
@@ -1,4 +1,4 @@
-"""activities
+"""activities.
 
 Revision ID: 06fab6583881
 Revises: 
@@ -17,6 +17,7 @@
 
 
 def upgrade():
+    """Upgrade alembic migration version."""
     # ### commands auto generated by Alembic - please adjust! ###
     op.create_table('activities',
     sa.Column('id', sa.Integer(), nullable=False),
@@ -48,6 +49,7 @@ def upgrade():
 
 
 def downgrade():
+    """Downgrade alembic migration version."""
     # ### commands auto generated by Alembic - please adjust! ###
     op.drop_table('activity_history', schema='collection_builder')
     op.drop_table('activities', schema='collection_builder')

diff --git a/bdc_collection_builder/alembic/64c2a4bb18e1_activity_children.py b/bdc_collection_builder/alembic/64c2a4bb18e1_activity_children.py
@@ -1,10 +1,19 @@
-"""activity_children
+#
+# This file is part of Brazil Data Cube Collection Builder.
+# Copyright (C) 2019-2020 INPE.
+#
+# Brazil Data Cube Collection Builder is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+#
+
+"""activity_children.
 
 Revision ID: 64c2a4bb18e1
 Revises: 
 Create Date: 2020-10-03 23:20:32.253182
 
 """
+
 from alembic import op
 import sqlalchemy as sa
 
@@ -17,6 +26,7 @@
 
 
 def upgrade():
+    """Upgrade alembic migration version."""
     # ### commands auto generated by Alembic - please adjust! ###
     op.create_table('activity_src',
     sa.Column('activity_id', sa.Integer(), nullable=False),
@@ -50,6 +60,7 @@ def upgrade():
 
 
 def downgrade():
+    """Downgrade alembic migration version."""
     # ### commands auto generated by Alembic - please adjust! ###
     op.alter_column('activities', 'sceneid',
                     existing_type=sa.String(length=255),

diff --git a/bdc_collection_builder/celery/publish.py b/bdc_collection_builder/celery/publish.py
@@ -1,3 +1,13 @@
+#
+# This file is part of Brazil Data Cube Collection Builder.
+# Copyright (C) 2019-2020 INPE.
+#
+# Brazil Data Cube Collection Builder is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+#
+
+"""Module to publish an collection item on database."""
+
 import logging
 import mimetypes
 import shutil
@@ -22,6 +32,7 @@
 
 
 def guess_mime_type(extension: str, cog=False) -> Optional[str]:
+    """Try to identify file mimetype."""
     mime = mimetypes.guess_type(extension)
 
     if mime[0] in COG_MIME_TYPE and cog:
@@ -33,21 +44,21 @@ def guess_mime_type(extension: str, cog=False) -> Optional[str]:
 def create_quick_look(file_output, red_file, green_file, blue_file, rows=768, cols=768, no_data=-9999):
     """Generate a Quick Look file (PNG based) from a list of files.
 
-        Note:
-            The file order in ``files`` represents the bands Red, Green and Blue, respectively.
-
-        Exceptions:
-            RasterIOError when could not open a raster file band
-
-        Args:
-            file_output: Path to store the quicklook file.
-            red_file: Path to the band attached into red channel.
-            green_file: Path to the band attached into green channel.
-            blue_file: Path to the band attached into blue channel.
-            rows: Image height. Default is 768.
-            cols: Image width. Default is 768.
-            no_data: Use custom value for nodata.
-        """
+    Note:
+        The file order in ``files`` represents the bands Red, Green and Blue, respectively.
+
+    Exceptions:
+        RasterIOError when could not open a raster file band
+
+    Args:
+        file_output: Path to store the quicklook file.
+        red_file: Path to the band attached into red channel.
+        green_file: Path to the band attached into green channel.
+        blue_file: Path to the band attached into blue channel.
+        rows: Image height. Default is 768.
+        cols: Image width. Default is 768.
+        no_data: Use custom value for nodata.
+    """
     image = numpy.zeros((rows, cols, 3,), dtype=numpy.uint8)
 
     nb = 0

diff --git a/bdc_collection_builder/celery/tasks.py b/bdc_collection_builder/celery/tasks.py
@@ -1,3 +1,13 @@
+#
+# This file is part of Brazil Data Cube Collection Builder.
+# Copyright (C) 2019-2020 INPE.
+#
+# Brazil Data Cube Collection Builder is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+#
+
+"""Module to deal with Celery Tasks."""
+
 import logging
 import os
 import shutil
@@ -80,6 +90,7 @@ def execution_from_collection(activity, collection_id=None, activity_type=None):
 
 
 def get_provider_collection(provider_name: str, dataset: str) -> BaseCollection:
+    """Retrieve a data collector class instance from given bdc-collector provider."""
     collector_extension = flask_app.extensions['bdc:collector']
 
     provider_class = collector_extension.get_provider(provider_name)
@@ -103,6 +114,7 @@ def get_provider_collection(provider_name: str, dataset: str) -> BaseCollection:
 
 
 def get_provider_collection_from_activity(activity: dict) -> BaseCollection:
+    """Retrieve an instance of bdc_collectors.base.BaseCollection."""
     return get_provider_collection(activity['args']['catalog'], activity['args']['dataset'])(activity['sceneid'])
 
 
@@ -123,6 +135,7 @@ def refresh_execution_args(execution: RadcorActivityHistory, activity: dict, **k
     default_retry_delay=Config.TASK_RETRY_DELAY
 )
 def download(activity: dict, **kwargs):
+    """Celery tasks to deal with download data product from given providers."""
     execution = create_execution(activity)
 
     collector_extension = flask_app.extensions['bdc:collector']
@@ -197,6 +210,7 @@ def download(activity: dict, **kwargs):
 
 @current_app.task(queue='correction')
 def correction(activity: dict, collection_id=None, **kwargs):
+    """Celery task to deal with Surface Reflectance processors."""
     execution = execution_from_collection(activity, collection_id=collection_id, activity_type=correction.__name__)
 
     collection: Collection = execution.activity.collection
@@ -290,6 +304,7 @@ def correction(activity: dict, collection_id=None, **kwargs):
 
 @current_app.task(queue='publish')
 def publish(activity: dict, collection_id=None, **kwargs):
+    """Celery tasks to publish an item on database."""
     execution = execution_from_collection(activity, collection_id=collection_id, activity_type=publish.__name__)
 
     collection = execution.activity.collection
@@ -320,6 +335,7 @@ def publish(activity: dict, collection_id=None, **kwargs):
 
 @current_app.task(queue='post')
 def post(activity: dict, collection_id=None, **kwargs):
+    """Celery task to deal with data post processing."""
     execution = execution_from_collection(activity, collection_id=collection_id, activity_type=post.__name__)
 
     collection = execution.activity.collection
@@ -379,6 +395,14 @@ def harmonization(activity: dict, collection_id=None, **kwargs):
 
         target_tmp_dir.mkdir(exist_ok=True, parents=True)
 
+        reflectance_dir = Path(activity['args']['file'])
+
+        glob = list(reflectance_dir.glob(f'**/{activity["sceneid"]}_Fmask4.tif'))
+
+        fmask = glob[0]
+
+        shutil.copy(str(fmask), target_tmp_dir)
+
         if activity['sceneid'].startswith('S2'):
             shutil.unpack_archive(activity['args']['compressed_file'], tmp)
 
@@ -395,19 +419,18 @@ def harmonization(activity: dict, collection_id=None, **kwargs):
             product_version = int(data_collection.parser.satellite())
             sat_sensor = '{}{}'.format(data_collection.parser.source()[:2], product_version)
 
-            landsat_harmonize(sat_sensor, activity['args']['file'], str(target_tmp_dir))
+            landsat_harmonize(sat_sensor, activity["sceneid"], activity['args']['file'], str(target_tmp_dir))
 
-        reflectance_dir = Path(activity['args']['file'])
-
-        glob = list(reflectance_dir.glob('**/*_Fmask4.tif'))
+        Path(target_dir).mkdir(exist_ok=True, parents=True)
 
-        fmask = glob[0]
+        for entry in Path(target_tmp_dir).iterdir():
+            entry_name = entry.name
 
-        shutil.copy(str(fmask), target_tmp_dir)
+            target_entry = Path(target_dir) / entry_name
 
-        Path(target_dir).mkdir(exist_ok=True, parents=True)
+            if target_entry.exists():
+                os.remove(str(target_entry))
 
-        for entry in Path(target_tmp_dir).iterdir():
             shutil.move(str(entry), target_dir)
 
     activity['args']['file'] = target_dir

diff --git a/bdc_collection_builder/celery/utils.py b/bdc_collection_builder/celery/utils.py
@@ -27,6 +27,7 @@ def list_pending_tasks():
 
 
 def load_celery_models():
+    """Prepare and load celery models in database backend."""
     session = SessionManager()
     engine = session.get_engine(current_app.backend.url)
     session.prepare_models(engine)
diff --git a/bdc_collection_builder/cli.py b/bdc_collection_builder/cli.py
@@ -31,6 +31,7 @@ def cli():
 @click.pass_context
 @with_appcontext
 def create_namespaces(ctx):
+    """Create all namespaces used in BDC-Collection-Builder and BDC-Catalog."""
     warnings.simplefilter('always', DeprecationWarning)
     warnings.warn(
         '\nThis command line utility is deprecated.'

diff --git a/bdc_collection_builder/collections/index_generator.py b/bdc_collection_builder/collections/index_generator.py
@@ -1,3 +1,13 @@
+#
+# This file is part of Brazil Data Cube Collection Builder.
+# Copyright (C) 2019-2020 INPE.
+#
+# Brazil Data Cube Collection Builder is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+#
+
+"""Module to generate collection bands dynamically using bdc.bands.metadata property."""
+
 import logging
 from pathlib import Path
 from typing import List, Dict
@@ -18,17 +28,21 @@ class AutoCloseDataSet:
     """Class to wraps the rasterio.io.Dataset to auto close data set out of scope."""
 
     def __init__(self, file_path: str, mode='r', **options):
+        """Build an auto close dataset instance."""
         self.dataset = rasterio.open(str(file_path), mode=mode, **options)
         self.profile = options
 
     def close(self):
+        """Try to close a data set."""
         if self.dataset:
             self.dataset.close()
 
     def __del__(self):
+        """Destructor method that close datasets before object is destroyed."""
         self.close()
 
     def __exit__(self, exc_type, exc_val, exc_tb):
+        """Close data set when object out of scope."""
         self.close()