ECMWFCode4Earth · tommylees112 · Aug 5, 2019 · Aug 5, 2019 · Aug 5, 2019 · Aug 5, 2019
diff --git a/scripts/drafts/icdc.py b/scripts/drafts/icdc.py
@@ -0,0 +1,26 @@
+from src.preprocess.icdc import (
+    ESACCISoilMoisture,
+    LAIModisAvhrr,
+    ModisNDVI
+)
+
+
+def modis_ndvi():
+    processor = ModisNDVI()
+    processor.preprocess()
+
+
+def cci_soil_moisture():
+    processor = ESACCISoilMoisture()
+    processor.preprocess()
+
+
+def modis_lai():
+    processor = LAIModisAvhrr()
+    processor.preprocess()
+
+
+if __name__ == '__main__':
+    modis_ndvi()
+    cci_soil_moisture()
+    modis_lai()
diff --git a/scripts/preprocess.py b/scripts/preprocess.py
@@ -9,7 +9,6 @@
 
 from src.preprocess.admin_boundaries import KenyaAdminPreprocessor
 
-
 def process_vci_2018():
     # if the working directory is alread ml_drought don't need ../data
     if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
@@ -127,11 +126,31 @@ def preprocess_era5():
     processor.preprocess(subset_str='kenya', regrid=regrid_path)
 
 
+def preprocess_icdc():
+    from src.preprocess.icdc import (
+        ModisNDVIPreprocessor,
+        ModisLSTPreprocessor,
+    )
+    if Path('.').absolute().as_posix().split('/')[-1] == 'ml_drought':
+        data_path = Path('data')
+    else:
+        data_path = Path('../data')
+    processor = ModisNDVIPreprocessor(data_path)
+    processor.preprocess(
+        subset_str='ethiopia_safe',
+    )
+
+    processor = ModisLSTPreprocessor(data_path)
+    processor.preprocess(
+        subset_str='ethiopia_safe'
+    )
+
 if __name__ == '__main__':
-    process_vci_2018()
-    process_precip_2018()
-    process_era5POS_2018()
-    process_gleam()
-    process_esa_cci_landcover()
-    preprocess_srtm()
-    preprocess_era5()
+    # process_vci_2018()
+    # process_precip_2018()
+    # process_era5POS_2018()
+    # process_gleam()
+    # process_esa_cci_landcover()
+    # preprocess_srtm()
+    # preprocess_era5()
+    preprocess_icdc()
diff --git a/src/preprocess/icdc.py b/src/preprocess/icdc.py
@@ -0,0 +1,247 @@
+from pathlib import Path
+import xarray as xr
+from shutil import rmtree
+from typing import Optional, List
+
+from .base import BasePreProcessor
+
+
+class ICDCPreprocessor(BasePreProcessor):
+    """ For working with data on ICDC (SPECIFIC to Uni Server)
+    """
+    variable: str  # the name of the variable on icdc
+    source: str  # {'land', 'atmosphere', 'climate_indices', 'ocean', 'ice_and_snow'}
+
+    def __init__(self, data_folder: Path = Path('data')) -> None:
+        super().__init__(data_folder)
+        self.icdc_data_dir = Path(f'/pool/data/ICDC/{self.source}/')
+
+    def get_icdc_filepaths(self) -> List[Path]:
+        dir = self.icdc_data_dir / self.dataset / 'DATA'
+        years = [d.name for d in dir.iterdir() if d.is_dir()]
+
+        filepaths: List = []
+        for year in years:
+            filepaths.extend((dir / year).glob('*.nc'))
+
+        if filepaths != []:
+            return filepaths
+        else:
+            filepaths.extend((dir).glob('*.nc'))
+
+            if filepaths != []:
+                return filepaths
+
+            else:
+                # HACKY: for the lst dataset
+                filepaths.extend((dir).glob('MONTHLY/**/*.nc'))
+                return filepaths
+
+    @staticmethod
+    def create_filename(netcdf_filename: str,
+                        subset_name: Optional[str] = None) -> str:
+        """
+        {base_str}.nc
+        """
+        filename_stem = netcdf_filename[:-3]
+        if subset_name is not None:
+            new_filename = f'{filename_stem}_{subset_name}.nc'
+        else:
+            new_filename = f'{filename_stem}.nc'
+        return new_filename
+
+    def _preprocess_single(self, netcdf_filepath: Path,
+                           subset_str: Optional[str] = 'kenya',
+                           regrid: Optional[xr.Dataset] = None) -> None:
+        """Run the Preprocessing steps for the data stored on ICDC
+        https://icdc.cen.uni-hamburg.de/1/daten.html
+
+        Process:
+        -------
+        * chop out ROI
+        * create new dataset with regrid dimensions
+        * Save the output file to new folder
+        """
+        print(f'Starting work on {netcdf_filepath.name}')
+        # 1. read in the dataset
+        ds = xr.open_dataset(netcdf_filepath)
+
+        # 2. chop out EastAfrica
+        if subset_str is not None:
+            try:
+                ds = self.chop_roi(ds, subset_str, inverse_lat=True)
+            except AssertionError:
+                ds = self.chop_roi(ds, subset_str, inverse_lat=False)
+
+        if regrid is not None:
+            ds = self.regrid(ds, regrid)
+
+        # 6. create the filepath and save to that location
+        assert netcdf_filepath.name[-3:] == '.nc', \
+            f'filepath name should be a .nc file. Currently: {netcdf_filepath.name}'
+
+        filename = self.create_filename(
+            netcdf_filepath.name,
+            subset_name=subset_str if subset_str is not None else None
+        )
+        print(f"Saving to {self.interim}/{filename}")
+        ds.to_netcdf(self.interim / filename)
+
+        print(f"** Done for {self.dataset} {netcdf_filepath.name} **")
+
+    def preprocess(self, subset_str: Optional[str] = 'kenya',
+                   regrid: Optional[Path] = None,
+                   resample_time: Optional[str] = 'M',
+                   upsampling: bool = False,
+                   cleanup: bool = False) -> None:
+        """ Preprocess all of the GLEAM .nc files to produce
+        one subset file.
+
+        Arguments
+        ----------
+        subset_str: Optional[str] = 'kenya'
+            Whether to subset Kenya when preprocessing
+        regrid: Optional[Path] = None
+            If a Path is passed, the CHIRPS files will be regridded to have the same
+            grid as the dataset at that Path. If None, no regridding happens
+        resample_time: str = 'M'
+            If not None, defines the time length to which the data will be resampled
+        upsampling: bool = False
+            If true, tells the class the time-sampling will be upsampling. In this case,
+            nearest instead of mean is used for the resampling
+        cleanup: bool = True
+            If true, delete interim files created by the class
+        """
+        nc_files = self.get_icdc_filepaths()
+
+        if regrid is not None:
+            regrid = self.load_reference_grid(regrid)
+
+        for file in nc_files:
+            self._preprocess_single(file, subset_str, regrid)
+
+        # merge all of the timesteps
+        self.merge_files(subset_str, resample_time, upsampling)
+
+        if cleanup:
+            rmtree(self.interim)
+
+
+class ESACCISoilMoisturePreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'esa_cci_soilmoisture'
+
+
+class LAIModisAvhrrPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'avhrr_modis_lai'
+
+
+class ModisNDVIPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis_aqua_vegetationindex'
+
+
+class AMSRESoilMoisturePreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'amsre_soilmoisture'
+
+
+class ASCATSoilMoisturePreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'ascat_soilmoisture'
+
+
+class EUMetsatAlbedoPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'eumetsat_albedo'
+
+
+class EUMetSatAlbedo2Preprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'eumetsat_clara2_surfacealbedo'
+
+
+class EUMetSatRadiationPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'eumetsat_clara2_surfaceradiation'
+
+
+class EUMetSatIrradiancePreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'eumetsat_surfacesolarirradiance'
+
+
+class SpotFAPARPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'fapar_spot_proba_v'
+
+
+class GLEAMEvaporationPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'gleam_evaporation'
+
+
+class SpotLaiPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'lai_spot_proba_v'
+
+
+class SpotLSAlbedoPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'land_surface_albedo_spot'
+
+
+class ModisAlbedoPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis_albedo'
+
+
+class ModisForestCoverPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis_forestcoverfraction'
+
+
+class ModisLandcoverPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis_landcover'
+
+
+class ModisLatLonPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis_latlon'
+
+
+class ModisLSTClimatologyPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis_lst_climatology'
+
+
+class ModisNPPPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis_primary_production'
+
+
+class ModisSRTMPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis-srtm_landwaterdistribution'
+
+
+class ModisLSTPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'modis_terra_landsurfacetemperature'
+
+
+class SMOSSoilMoisturePreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'smos_soilmoisture'
+
+
+class TopographyPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'topography'
+
+
+class SpotVegetationCoverFractionPreprocessor(ICDCPreprocessor):
+    source = 'land'
+    dataset = 'vegetationcoverfraction_spot_proba_v'
diff --git a/src/utils.py b/src/utils.py
@@ -26,8 +26,17 @@ def get_kenya() -> Region:
 
 
 def get_ethiopia() -> Region:
-    return Region(name='ethiopia', lonmin=32.9975838, lonmax=47.9823797,
-                  latmin=3.397448, latmax=14.8940537)
+    return Region(
+        name='ethiopia', lonmin=32.9975838, lonmax=47.9823797,
+        latmin=3.397448, latmax=14.8940537
+    )
+
+
+def get_ethiopia_safe() -> Region:
+    return Region(
+        name='ethiopia_safe', lonmin=30, lonmax=50,
+        latmin=2, latmax=15
+    )
 
 
 def get_east_africa() -> Region:
@@ -181,5 +190,6 @@ def drop_nans_and_flatten(dataArray: xr.DataArray) -> np.ndarray:
 region_lookup = {
     "kenya": get_kenya(),
     "ethiopia": get_ethiopia(),
+    "ethiopia_safe": get_ethiopia_safe(),
     "east_africa": get_east_africa(),
 }