Skip to content

Commit

Permalink
Merge pull request #230 from raphaelrpl/b-0.8
Browse files Browse the repository at this point in the history
Add support to publish HDF files (modis) (close #221)
  • Loading branch information
raphaelrpl authored Apr 26, 2021
2 parents eab4a3f + c4c096e commit b1deaf9
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 2 deletions.
17 changes: 16 additions & 1 deletion bdc_collection_builder/celery/publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,22 @@ def publish_collection(scene_id: str, data: BaseCollection, collection: Collecti
else:
destination.mkdir(parents=True, exist_ok=True)

files = data.get_files(collection, path=file)
if file.endswith('.hdf'):
from ..collections.hdf import to_geotiff

destination.mkdir(parents=True, exist_ok=True)
item_result = to_geotiff(file, temporary_dir.name)
files = dict()

for _band, _geotiff in item_result.files.items():
destination_path = destination / Path(_geotiff).name
shutil.move(str(_geotiff), str(destination_path))
files[_band] = destination_path

file = destination
cloud_cover = item_result.cloud_cover
else:
files = data.get_files(collection, path=file)

extra_assets = data.get_assets(collection, path=file)

Expand Down
99 changes: 99 additions & 0 deletions bdc_collection_builder/collections/hdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#
# This file is part of Brazil Data Cube Collection Builder.
# Copyright (C) 2019-2020 INPE.
#
# Brazil Data Cube Collection Builder is free software; you can redistribute it and/or modify it
# under the terms of the MIT License; see LICENSE file for more details.
#

"""Module to deal with Hierarchical Data Format (HDF4/HDF5)."""

from pathlib import Path
from typing import NamedTuple

from osgeo import gdal

DTYPES = dict(
uint8=gdal.GDT_Byte,
int16=gdal.GDT_Int16,
uint16=gdal.GDT_UInt16,
int32=gdal.GDT_Int32,
uint32=gdal.GDT_UInt32,
)

ItemResult = NamedTuple('ItemResult', [('files', dict), ('cloud_cover', float)])
"""Type to represent the extracted scenes from an Hierarchical Data Format (HDF4/HDF5)."""


def to_geotiff(hdf_path: str, destination: str) -> ItemResult:
"""Convert a Hierarchical Data Format (HDF4/HDF5) file to set of GeoTIFF files.
Args:
hdf_path (str) - Path to the HDF file to be extracted
destination (str) - The destination folder.
Note:
The output GeoTIFF files are not Cloud Optimized GeoTIFF (COG).
Tip:
You may use the utility :meth:bdc_collection_builder.collections.utils.generate_cogs to generate Cloud Optimized GeoTIFF files.
Raises:
IOError When the input file is not a valid data set.
Returns:
ItemResult A struct containing the extracted files
"""
data_set = gdal.Open(hdf_path)

if data_set is None:
raise IOError(f'Could not open {str(hdf_path)}')

base_name = Path(hdf_path).stem
metadata = data_set.GetMetadata()
cloud_cover = float(metadata.get('QAPERCENTCLOUDCOVER.1') or 0)
output_path = Path(destination)

geotiff_driver = gdal.GetDriverByName('GTiff')
files = dict()
# Band iterator index to retrieve metadata value
band_idx = 1
for data_set_name, _ in data_set.GetSubDatasets():
formal_name = metadata[f'PARAMETERNAME.{band_idx}']
band_name = '_'.join(formal_name.split(' ')[3:])

data_set = gdal.Open(data_set_name)
band = data_set.GetRasterBand(1)
array = band.ReadAsArray()

tiff_file = output_path / f'{base_name}_{band_name}.tif'

output_data_set = geotiff_driver.Create(
str(tiff_file),
data_set.RasterXSize,
data_set.RasterYSize,
1,
DTYPES[array.dtype.name]
)
output_data_set_band = output_data_set.GetRasterBand(1)
output_data_set.SetGeoTransform(data_set.GetGeoTransform())
output_data_set.SetProjection(data_set.GetProjection())
output_data_set.SetMetadata(metadata)
output_data_set_band.WriteArray(array)
output_data_set_band.SetNoDataValue(0)

files[band_name] = str(tiff_file)

output_data_set_band = None
output_data_set = None

band_idx += 1

return ItemResult(files=files, cloud_cover=cloud_cover)


def is_valid(file_path: str) -> bool:
"""Check the HDF file integrity with GDAL library."""
ds = gdal.Open(file_path)

return ds is not None
4 changes: 4 additions & 0 deletions bdc_collection_builder/collections/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,10 @@ def is_valid_compressed_file(file_path: str) -> bool:
return is_valid_tar(file_path)
if file_path.endswith('.tar.gz'):
return is_valid_tar_gz(file_path)
if file_path.endswith('.hdf'):
from .hdf import is_valid
return is_valid(file_path)
return True


def is_valid_tar(file_path: str) -> bool:
Expand Down
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@
extras_require = {
'docs': docs_require,
'tests': tests_require,
'harmonization': harmonization_require
'harmonization': harmonization_require,
'gdal': [
'GDAL>=2.3',
'bdc-collectors @ git+git://github.com/brazil-data-cube/[email protected]#egg=bdc-collectors[modis]',
]
}

extras_require['all'] = [req for exts, reqs in extras_require.items() for req in reqs]
Expand Down

0 comments on commit b1deaf9

Please sign in to comment.