diff --git a/CHANGES.rst b/CHANGES.rst index 8b06e64..16c8979 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,16 @@ Changes ======= +Version 0.8.0 (2021-05-04) +-------------------------- + +- Add route resource to check published scenes `225 `_. +- Add support to publish MODIS Cloud Optimized GeoTIFF (COG) data `221 `_. +- Add support to publish MODIS as HDF item `231 `_. +- Change default compression type to deflate on COG Files `227 `_. +- Add support to publish Landsat-8 Collection 2 `220 `_. + + Version 0.6.1 (2021-01-21) -------------------------- diff --git a/bdc_collection_builder/celery/publish.py b/bdc_collection_builder/celery/publish.py index 0d80786..17d9dbf 100644 --- a/bdc_collection_builder/celery/publish.py +++ b/bdc_collection_builder/celery/publish.py @@ -32,6 +32,7 @@ from ..collections.utils import (create_asset_definition, generate_cogs, get_or_create_model, raster_convexhull, raster_extent) +from ..config import Config from ..constants import COG_MIME_TYPE @@ -103,8 +104,8 @@ def compress_raster(input_path: str, output_path: str, algorithm: str = 'lzw'): shutil.move(tmp_file, output_path) -def _asset_definition(path, band=None, is_raster=False, cog=False): - href = _item_prefix(path) +def _asset_definition(path, band=None, is_raster=False, cog=False, **options): + href = _item_prefix(path, **options) if band and band.mime_type: mime_type = band.mime_type.name @@ -120,13 +121,19 @@ def _asset_definition(path, band=None, is_raster=False, cog=False): ) -def _item_prefix(path: Path) -> str: +def _item_prefix(path: Path, prefix=None, item_prefix=None) -> str: """Retrieve the bdc_catalog.models.Item prefix used in assets.""" - href = f'/{str(path.relative_to(current_app.config["DATA_DIR"]))}' + if prefix is None: + prefix = current_app.config["DATA_DIR"] + + href = f'/{str(path.relative_to(prefix))}' if current_app.config['USE_BUCKET_PREFIX']: return href.replace('/Repository/Archive/', current_app.config['AWS_BUCKET_NAME']) + if item_prefix: + href = href.replace('/Repository', item_prefix) + return href @@ -161,7 +168,7 @@ def get_footprint_sentinel(mtd_file: str) -> shapely.geometry.Polygon: def publish_collection(scene_id: str, data: BaseCollection, collection: Collection, file: str, - cloud_cover=None, provider_id: Optional[int] = None) -> Item: + cloud_cover=None, provider_id: Optional[int] = None, **kwargs) -> Item: """Generate the Cloud Optimized Files for Image Collection and publish meta information in database. Notes: @@ -183,6 +190,7 @@ def publish_collection(scene_id: str, data: BaseCollection, collection: Collecti file_band_map = dict() assets = dict() old_file_path = file + asset_item_prefix = prefix = None temporary_dir = TemporaryDirectory() @@ -243,17 +251,54 @@ def publish_collection(scene_id: str, data: BaseCollection, collection: Collecti else: destination.mkdir(parents=True, exist_ok=True) + tile_id = data.parser.tile_id() + if file.endswith('.hdf'): from ..collections.hdf import to_geotiff + opts = dict(prefix=Config.CUBES_DATA_DIR) + + if kwargs.get('publish_hdf'): + opts['prefix'] = Config.DATA_DIR + opts['cube_prefix'] = 'Mosaic' + else: + asset_item_prefix = Config.CUBES_ITEM_PREFIX + prefix = Config.CUBES_DATA_DIR + + tile_id = tile_id.replace('h', '0').replace('v', '0') + destination = data.path(collection, **opts) destination.mkdir(parents=True, exist_ok=True) item_result = to_geotiff(file, temporary_dir.name) files = dict() - for _band, _geotiff in item_result.files.items(): - destination_path = destination / Path(_geotiff).name - shutil.move(str(_geotiff), str(destination_path)) - files[_band] = destination_path + if item_result.files: + ref = list(item_result.files.values())[0] + + geom = from_shape(raster_extent(str(ref)), srid=4326) + + with rasterio.open(ref) as d: + nodata = d.profile.get('nodata') + # Trust in band metadata (no data) + convex_hull = raster_convexhull(str(ref), no_data=nodata) + + if convex_hull.area > 0.0: + convex_hull = from_shape(convex_hull, srid=4326) + + if kwargs.get('publish_hdf'): + # Generate Quicklook and append asset + assets['asset'] = create_asset_definition( + href=_item_prefix(Path(file)), + mime_type=guess_mime_type(file), + role=['data'], + absolute_path=str(file) + ) + + file_band_map = item_result.files + else: + for _band, _geotiff in item_result.files.items(): + destination_path = destination / Path(_geotiff).name + shutil.move(str(_geotiff), str(destination_path)) + files[_band] = destination_path file = destination cloud_cover = item_result.cloud_cover @@ -263,7 +308,7 @@ def publish_collection(scene_id: str, data: BaseCollection, collection: Collecti extra_assets = data.get_assets(collection, path=file) tile = Tile.query().filter( - Tile.name == data.parser.tile_id(), + Tile.name == tile_id, Tile.grid_ref_sys_id == collection.grid_ref_sys_id ).first() @@ -291,7 +336,7 @@ def publish_collection(scene_id: str, data: BaseCollection, collection: Collecti if convex_hull.area > 0.0: convex_hull = from_shape(convex_hull, srid=4326) - assets[band.name] = _asset_definition(path, band, is_raster, cog=True) + assets[band.name] = _asset_definition(path, band, is_raster, cog=True, item_prefix=asset_item_prefix, prefix=prefix) break @@ -304,14 +349,14 @@ def publish_collection(scene_id: str, data: BaseCollection, collection: Collecti if is_raster: compress_raster(str(asset_file_path), str(asset_file_path)) - assets[asset_name] = _asset_definition(asset_file_path, is_raster=is_raster, cog=False) + assets[asset_name] = _asset_definition(asset_file_path, is_raster=is_raster, cog=False, item_prefix=asset_item_prefix, prefix=prefix) index_bands = generate_band_indexes(scene_id, collection, file_band_map) for band_name, band_file in index_bands.items(): path = Path(band_file) - assets[band_name] = _asset_definition(path, collection_band_map[band_name], is_raster=True, cog=True) + assets[band_name] = _asset_definition(path, collection_band_map[band_name], is_raster=True, cog=True, item_prefix=asset_item_prefix, prefix=prefix) # TODO: Remove un-necessary files @@ -328,7 +373,7 @@ def publish_collection(scene_id: str, data: BaseCollection, collection: Collecti create_quick_look(str(quicklook), red_file, green_file, blue_file) - relative_quicklook = _item_prefix(quicklook) + relative_quicklook = _item_prefix(quicklook, item_prefix=asset_item_prefix, prefix=prefix) assets['thumbnail'] = create_asset_definition( href=relative_quicklook, diff --git a/bdc_collection_builder/celery/tasks.py b/bdc_collection_builder/celery/tasks.py index 30d5f61..f3d5941 100644 --- a/bdc_collection_builder/celery/tasks.py +++ b/bdc_collection_builder/celery/tasks.py @@ -70,7 +70,7 @@ def create_execution(activity): model.start = datetime.utcnow() # Ensure that args values is always updated - copy_args = dict(**model.activity.args) + copy_args = dict(**model.activity.args or {}) copy_args.update(activity['args'] or dict()) model.activity.args = copy_args @@ -336,11 +336,14 @@ def publish(activity: dict, collection_id=None, **kwargs): refresh_execution_args(execution, activity, file=str(file)) + options = activity['args'] + options.update(**kwargs) + provider_id = activity['args'].get('provider_id') publish_collection(scene_id, data_collection, collection, file, cloud_cover=activity['args'].get('cloud'), - provider_id=provider_id) + provider_id=provider_id, publish_hdf=options.get('publish_hdf')) if file: refresh_execution_args(execution, activity, file=str(file)) diff --git a/bdc_collection_builder/config.py b/bdc_collection_builder/config.py index 969e695..73f0448 100644 --- a/bdc_collection_builder/config.py +++ b/bdc_collection_builder/config.py @@ -63,6 +63,8 @@ class Config: REDIS_URL = os.environ.get('REDIS_URL', 'redis://localhost:6379') RABBIT_MQ_URL = os.environ.get('RABBIT_MQ_URL', 'pyamqp://guest@localhost') DATA_DIR = os.environ.get('DATA_DIR', tempfile.gettempdir()) + CUBES_DATA_DIR = os.environ.get('CUBES_DATA_DIR', tempfile.gettempdir()) + CUBES_ITEM_PREFIX = os.environ.get('CUBES_ITEM_PREFIX', '/data/d006') TASK_RETRY_DELAY = int(os.environ.get('TASK_RETRY_DELAY', 60 * 60)) # a hour diff --git a/bdc_collection_builder/version.py b/bdc_collection_builder/version.py index 8e3673b..7ed2759 100644 --- a/bdc_collection_builder/version.py +++ b/bdc_collection_builder/version.py @@ -13,4 +13,4 @@ """ -__version__ = '0.6.1' +__version__ = '0.8.0' diff --git a/docker/Dockerfile b/docker/Dockerfile index fc07f93..35dbfdd 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -10,6 +10,9 @@ WORKDIR /app RUN apt-get update && \ apt-get install --yes nano && \ pip3 install pip --upgrade && \ - rm -rf /var/lib/apt/lists/* && \ - pip install wheel && \ + rm -rf /var/lib/apt/lists/* + +RUN pip install wheel && \ + pip install gdal==2.4.0 && \ + pip install pymodis && \ pip install -e .[all] diff --git a/docker/Dockerfile.atm b/docker/Dockerfile.atm index fe0e78d..95af1db 100644 --- a/docker/Dockerfile.atm +++ b/docker/Dockerfile.atm @@ -5,7 +5,7 @@ ENV LANG C.UTF-8 RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install --yes \ - apt-transport-https ca-certificates curl gnupg-agent software-properties-common + apt-transport-https ca-certificates curl gnupg-agent software-properties-common git RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - && \ add-apt-repository \ @@ -25,7 +25,6 @@ ADD . /app WORKDIR /app -RUN pip3 install pip --upgrade && \ - pip install --upgrade setuptools && \ - pip install wheel && \ - pip install -e .[all] +RUN pip3 install -U pip && \ + pip3 install wheel && \ + pip3 install -e . \ No newline at end of file diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py index 063e860..1cec241 100644 --- a/docs/sphinx/conf.py +++ b/docs/sphinx/conf.py @@ -61,7 +61,6 @@ html_theme = 'sphinx_rtd_theme' html_theme_options = { - 'html_baseurl': 'https://brazil-data-cube.github.io/', 'analytics_id': 'XXXXXXXXXX', 'logo_only': False, 'display_version': True, diff --git a/setup.py b/setup.py index 434bd0a..b6d21db 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ 'harmonization': harmonization_require, 'gdal': [ 'GDAL>=2.3', - 'bdc-collectors @ git+git://github.com/brazil-data-cube/bdc-collectors.git@v0.2.1#egg=bdc-collectors[modis]', + 'bdc-collectors @ git+git://github.com/brazil-data-cube/bdc-collectors.git@v0.4.0#egg=bdc-collectors[modis]', ] } @@ -63,8 +63,8 @@ 'numpy>=1.17.2', 'numpngw>=0.0.8', 'scikit-image>=0.16.2', - 'SQLAlchemy[postgresql_psycopg2binary]>=1.3,<1.4', - 'bdc-collectors @ git+git://github.com/brazil-data-cube/bdc-collectors.git@master#egg=bdc-collectors', + 'SQLAlchemy[postgresql_psycopg2binary]>=1.3,<2', + 'bdc-collectors @ git+git://github.com/brazil-data-cube/bdc-collectors.git@v0.4.0#egg=bdc-collectors', 'celery[librabbitmq]>=4.3,<4.4.3', 'Werkzeug>=0.16,<1.0', 'shapely>=1.7,<2'