From 426fd9af321a416cf5724c3e7b311115517c907f Mon Sep 17 00:00:00 2001 From: tommylees112 Date: Fri, 24 Jan 2020 01:01:09 +0000 Subject: [PATCH 1/7] update the fewsnet preprocessors --- src/exporters/fewsnet_shapefiles.py | 58 +++++++++ src/preprocess/base.py | 2 +- src/preprocess/fewsnet_shapefiles.py | 176 +++++++++++++++++++++++++++ src/preprocess/utils.py | 101 +++++++++------ 4 files changed, 298 insertions(+), 39 deletions(-) create mode 100644 src/exporters/fewsnet_shapefiles.py create mode 100644 src/preprocess/fewsnet_shapefiles.py diff --git a/src/exporters/fewsnet_shapefiles.py b/src/exporters/fewsnet_shapefiles.py new file mode 100644 index 000000000..bf6d27267 --- /dev/null +++ b/src/exporters/fewsnet_shapefiles.py @@ -0,0 +1,58 @@ +from pathlib import Path +import os +from typing import List + +from .base import BaseExporter + + +class FEWSNetExporter(BaseExporter): + """Export FEWSNet data + + https://fews.net/ + + TODO: need to use Selenium to navigate this page? + https://fews.net/data + """ + + data_str: str + + def __init__(self, data_folder: Path = Path("data")) -> None: + super().__init__(data_folder) + # write the download to landcover + self.output_dir = self.raw_folder / "boundaries" / self.data_str + if not self.output_dir.exists(): + self.output_dir.mkdir(parents=True, exist_ok=True) + + def wget_file(self, url_path: str) -> None: + output_file = self.output_dir / url_path.split("/")[-1] + if output_file.exists(): + print(f"{output_file} already exists! Skipping") + return None + + os.system(f"wget {url_path} -P {self.output_dir.as_posix()}") + + def unzip(self, fname: Path) -> None: + print(f"Unzipping {fname.name}") + + os.system( + f"unzip {fname.as_posix()} -d {self.output_dir.resolve().as_posix()}" + ) + print(f"{fname.name} unzipped!") + + +class FEWSNetKenyaLivelihoodExporter(FEWSNetExporter): + data_str = "livelihood_zones" + url: List = "http://shapefiles.fews.net.s3.amazonaws.com/LHZ/FEWS_NET_LH_World.zip" + + def export(self) -> None: + """Export functionality for the FEWSNET Livelihood Zones as .shp files + """ + + fname = self.url.split("/")[-1] + # check if the file already exists + if (self.output_dir / fname).exists(): + print("Data already downloaded!") + + else: + self.wget_file(url_path=self.url) + self.unzip(fname=(self.output_dir / fname)) diff --git a/src/preprocess/base.py b/src/preprocess/base.py index b4379fb6a..e2f15ad0e 100644 --- a/src/preprocess/base.py +++ b/src/preprocess/base.py @@ -149,7 +149,7 @@ def regrid( savedir = self.preprocessed_folder / filename regridder = xesmf.Regridder( # type: ignore - ds, ds_out, method, filename=str(savedir), reuse_weights=False, + ds, ds_out, method, filename=str(savedir), reuse_weights=False ) variables = [v for v in ds.data_vars] diff --git a/src/preprocess/fewsnet_shapefiles.py b/src/preprocess/fewsnet_shapefiles.py new file mode 100644 index 000000000..848078ef1 --- /dev/null +++ b/src/preprocess/fewsnet_shapefiles.py @@ -0,0 +1,176 @@ +from pathlib import Path +import xarray as xr +from collections import namedtuple +from .base import BasePreProcessor +from .utils import SHPtoXarray + +from typing import Optional, Dict + +gpd = None +GeoDataFrame = None + + +class FEWSNetPreprocesser(BasePreProcessor): + """ Preprocesses the FEWSNetwork shapefile data + """ + + country_code_mapping: dict = { + "AF": "Afghanistan", + "AO": "Angola", + "BF": "Burkina Faso", + "BI": "Burundi", + "CF": "CAR", + "DJ": "Djibouti", + "TZ": "Tanzania", + "ZW": "Zimbabwe", + "ZM": "Zambia", + "YE": "Yemen", + "UG": "Uganda", + "TD": "Chad", + "SV": "El Salvador", + "SN": "Senegal", + "SO": "Somalia", + "SL": "Sierra Leone", + "SD": "Sudan", + "NI": "Nicaragua", + "NG": "Nigeria", + "ET": "Ethiopia", + "NE": "Niger", + "GN": "Guinea", + "MZ": "Mozambique", + "HN": "Honduras", + "MW": "Malawi", + "ML": "Mali", + "MR": "Mauritania", + "HT": "Haiti", + "MG": "Madagascar", + "LR": "Liberia", + "KE": "Kenya", + "LS": "Lesotho", + "CD": "DR Congo", + "SS": "South Sudan", + "RW": "Rwanda", + "NI": "Nicaragua", + "TJ": "Tajikistan", + "GT": "Guatemala", + } + + dataset: str + analysis = True + + def __init__(self, data_folder: Path = Path("data")): + super().__init__(data_folder) + + # try and import geopandas + print("The FEWSNet preprocessor requires the geopandas package") + global gpd + if gpd is None: + import geopandas as gpd + global GeoDataFrame + if GeoDataFrame is None: + from geopandas.geodataframe import GeoDataFrame + + def get_filename(self, var_name: str, country: str) -> str: # type: ignore + new_filename = f"{var_name}_{country}.nc" + return new_filename + + +class FEWSNetLivelihoodPreprocessor(FEWSNetPreprocesser): + dataset = "livelihood_zones" + + def __init__(self, data_folder: Path = Path("data")) -> None: + super().__init__(data_folder) + self.base_raw_dir = self.raw_folder / "boundaries" / self.dataset + + def _preprocess_single( + self, + shp_filepath: Path, + reference_nc_filepath: Path, + var_name: str, + lookup_colname: str, + save: bool = True, + ) -> Optional[xr.Dataset]: + """ Preprocess .shp admin boundary files into an `.nc` + file with the same shape as reference_nc_filepath. + + Will create categorical .nc file which will specify + which admin region each pixel is in. + + Arguments + ---------- + shp_filepath: Path + The path to the shapefile + + reference_nc_filepath: Path + The path to the netcdf file with the shape + (must have been run through Preprocessors prior to using) + + var_name: str + the name of the Variable in the xr.Dataset and the name + of the output filename - {var_name}_{self.country}.nc + + lookup_colname: str + the column name to lookup in the shapefile + (read in as geopandas.GeoDataFrame) + """ + assert "interim" in reference_nc_filepath.parts, ( + "Expected " "the target data to have been preprocessed by the pipeline" + ) + + # MUST have a target dataset to create the same shape + target_ds = xr.ones_like(xr.open_dataset(reference_nc_filepath)) + data_var = [d for d in target_ds.data_vars][0] + da = target_ds[data_var] + + # turn the shapefile into a categorical variable (like landcover) + gdf = gpd.read_file(shp_filepath) + shp_to_nc = SHPtoXarray() + + for country_code in gdf.COUNTRY.unique(): + gdf_country = gdf.loc[gdf.COUNTRY == country_code] + + # create a unique filename for each country + country_str = ( + self.country_code_mapping[country_code].lower().replace(" ", "_") + ) + filename = self.get_filename(var_name, country_str) + if (self.out_dir / filename).exists(): + print( + "** Data already preprocessed! **\nIf you need to " + "process again then move or delete existing file" + f" at: {(self.out_dir / filename).as_posix()}" + ) + continue + + ds = shp_to_nc._to_xarray( + da=da, gdf=gdf_country, var_name=var_name, lookup_colname=lookup_colname + ) + + # save the data + print(f"Saving to {self.out_dir}") + + if self.analysis is True: + assert self.out_dir.parts[-2] == "analysis", ( + "self.analysis should" + "be True and the output directory should be analysis" + ) + + ds.to_netcdf(self.out_dir / filename) + # save key info columns + gdf_country[ + ["OBJECTID", "FNID", "LZNUM", "LZCODE", "LZNAMEEN", "CLASS"] + ].to_csv(self.out_dir / f"{country_str}_lookup_dict.csv") + + print( + f"** {(self.out_dir / filename).as_posix()} and lookup_dict saved! **" + ) + + def preprocess(self, reference_nc_filepath: Path) -> None: + """Preprocess FEWSNet Livelihood Zone shapefiles into xarray objects + """ + self._preprocess_single( + shp_filepath=self.base_raw_dir / "FEWS_NET_LH_World.shp", + lookup_colname="LZCODE", + reference_nc_filepath=reference_nc_filepath, + var_name="livelihood_zone", + ) diff --git a/src/preprocess/utils.py b/src/preprocess/utils.py index 93083895a..c6fb9779b 100644 --- a/src/preprocess/utils.py +++ b/src/preprocess/utils.py @@ -10,6 +10,7 @@ Affine = None gpd = None Polygon = None +GeoDataFrame = None def select_bounding_box( @@ -98,8 +99,14 @@ def __init__(self): if Polygon is None: from shapely.geometry import Polygon + global GeoDataFrame + if GeoDataFrame is None: + from geopandas.geodataframe import GeoDataFrame + @staticmethod - def transform_from_latlon(lat: xr.DataArray, lon: xr.DataArray) -> Affine: # type: ignore + def transform_from_latlon( + lat: xr.DataArray, lon: xr.DataArray + ) -> Affine: # type: ignore """ input 1D array of lat / lon and output an Affine transformation """ lat = np.asarray(lat) @@ -137,52 +144,20 @@ def rasterize( return xr.Dataset({variable_name: (dims, raster)}, coords=spatial_coords) - def shapefile_to_xarray( + def _to_xarray( self, da: xr.DataArray, - shp_path: Path, + gdf: GeoDataFrame, var_name: str = "region", lookup_colname: Optional[str] = None, ) -> xr.Dataset: - """ Create a new coord for the da indicating whether or not it - is inside the shapefile - - Creates a new coord - "var_name" which will have integer values - used to subset da for plotting / analysis - - Arguments: - --------- - :da: xr.DataArray - the `DataArray` with the shape that we want to rasterize the - shapefile onto. - - :shp_path: Path - the path to the .shp file to be converted into a categorical - xr.Dataset. - - :var_name: str = 'region' - the variable name in the new output Dataset - - :lookup_colname: Optional[str] = None - the column that defines the `values` in the lookup - dictionary when defining the (e.g. Region names) - - e.g. 'DISTNAME' in this shapefile below - DISTID DISTNAME geometry - 0 101.0 NAIROBI POLYGON ((36. -1 ... - 1 201.0 KIAMBU POLYGON ((36. -0.7 ... - + """ Returns: ------- :xr.Dataset Dataset with metadata associated with the areas in the shapefile. Stored as `ds.attrs['keys']` & `ds.attrs['values']` - - TODO: add a add_all_cols_as_attrs() function """ - # 1. read in shapefile - gdf = gpd.read_file(shp_path) # type: ignore - # allow the user to see the column headers if lookup_colname is None: print("lookup_colname MUST be provided (see error message below)") @@ -199,8 +174,8 @@ def shapefile_to_xarray( # 3. create a new variable set to the id in `shapes` (same shape as da) ds = self.rasterize(shapes=shapes, coords=da.coords, variable_name=var_name) - values = [value for value in gdf[lookup_colname].to_list()] - keys = [str(key) for key in gdf.index.to_list()] + values = [value for value in gdf[lookup_colname].tolist()] + keys = [str(key) for key in gdf.index.tolist()] data_vals = ds[[d for d in ds.data_vars][0]].values unique_values = np.unique(data_vals[~np.isnan(data_vals)]) unique_values = [str(int(v)) for v in unique_values] @@ -226,3 +201,53 @@ def shapefile_to_xarray( print("Are you certain the subset or shapefile are the correct region?") return ds + + def shapefile_to_xarray( + self, + da: xr.DataArray, + shp_path: Path, + var_name: str = "region", + lookup_colname: Optional[str] = None, + ) -> xr.Dataset: + """ Create a new coord for the da indicating whether or not it + is inside the shapefile + + Creates a new coord - "var_name" which will have integer values + used to subset da for plotting / analysis + + Arguments: + --------- + :da: xr.DataArray + the `DataArray` with the shape that we want to rasterize the + shapefile onto. + + :shp_path: Path + the path to the .shp file to be converted into a categorical + xr.Dataset. + + :var_name: str = 'region' + the variable name in the new output Dataset + + :lookup_colname: Optional[str] = None + the column that defines the `values` in the lookup + dictionary when defining the (e.g. Region names) + + e.g. 'DISTNAME' in this shapefile below + DISTID DISTNAME geometry + 0 101.0 NAIROBI POLYGON ((36. -1 ... + 1 201.0 KIAMBU POLYGON ((36. -0.7 ... + + Returns: + ------- + :xr.Dataset + Dataset with metadata associated with the areas in the shapefile. + Stored as `ds.attrs['keys']` & `ds.attrs['values']` + + TODO: add a add_all_cols_as_attrs() function + """ + # 1. read in shapefile + gdf = gpd.read_file(shp_path) # type: ignore + + return self._to_xarray( + da=da, gdf=gdf, var_name=var_name, lookup_colname=lookup_colname + ) From dc9b8af4ab74a6acda7b18fc6b38075e0cf9fab8 Mon Sep 17 00:00:00 2001 From: tommylees112 Date: Thu, 6 Feb 2020 00:18:01 +0000 Subject: [PATCH 2/7] update to get fewsnet working --- scripts/export.py | 11 ++++++++- scripts/preprocess.py | 35 +++++++++++++++++++--------- src/exporters/__init__.py | 2 ++ src/preprocess/__init__.py | 2 ++ src/preprocess/fewsnet_shapefiles.py | 25 +++++++++++++++++--- 5 files changed, 60 insertions(+), 15 deletions(-) diff --git a/scripts/export.py b/scripts/export.py index 6a239e3dc..ff7e1696a 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -14,6 +14,8 @@ ERA5LandExporter, ) +from src.exporters import FEWSNetKenyaLivelihoodExporter + from scripts.utils import get_data_path @@ -159,8 +161,14 @@ def export_kenya_boundaries(): exporter.export() +def export_fewsnet_shapefiles(): + data_path = get_data_path() + exporter = FEWSNetKenyaLivelihoodExporter(data_path) + exporter.export() + + if __name__ == "__main__": - export_era5_land() + # export_era5_land() # export_era5() # export_vhi() # export_chirps() @@ -169,3 +177,4 @@ def export_kenya_boundaries(): # export_esa() # export_s5() # export_kenya_boundaries() + export_fewsnet_shapefiles() \ No newline at end of file diff --git a/scripts/preprocess.py b/scripts/preprocess.py index b613fe0f5..f614e68f9 100644 --- a/scripts/preprocess.py +++ b/scripts/preprocess.py @@ -1,6 +1,7 @@ import sys sys.path.append("..") + from src.preprocess import ( VHIPreprocessor, CHIRPSPreprocessor, @@ -11,6 +12,7 @@ SRTMPreprocessor, ERA5MonthlyMeanPreprocessor, KenyaASALMask, + FEWSNetLivelihoodPreprocessor ) from src.preprocess.admin_boundaries import KenyaAdminPreprocessor @@ -137,15 +139,26 @@ def preprocess_era5(): processor.preprocess(subset_str="kenya", regrid=regrid_path) +def preprocess_livelihood_zones(): + data_path = get_data_path() + + regrid_path = data_path / "interim/chirps_preprocessed/data_kenya.nc" + assert regrid_path.exists(), f"{regrid_path} not available" + + processor = FEWSNetLivelihoodPreprocessor(data_path) + processor.preprocess(reference_nc_filepath=regrid_path, country_str='kenya') + + if __name__ == "__main__": - process_vci_2018() - process_precip_2018() - process_era5POS_2018() - process_gleam() - process_esa_cci_landcover() - preprocess_srtm() - preprocess_era5() - preprocess_kenya_boundaries(selection="level_1") - preprocess_kenya_boundaries(selection="level_2") - preprocess_kenya_boundaries(selection="level_3") - preprocess_asal_mask() + # process_vci_2018() + # process_precip_2018() + # process_era5POS_2018() + # process_gleam() + # process_esa_cci_landcover() + # preprocess_srtm() + # preprocess_era5() + # preprocess_kenya_boundaries(selection="level_1") + # preprocess_kenya_boundaries(selection="level_2") + # preprocess_kenya_boundaries(selection="level_3") + # preprocess_asal_mask() + preprocess_livelihood_zones() diff --git a/src/exporters/__init__.py b/src/exporters/__init__.py index 8a93ff95e..2cd0ea603 100644 --- a/src/exporters/__init__.py +++ b/src/exporters/__init__.py @@ -8,6 +8,7 @@ from .srtm import SRTMExporter from .esa_cci import ESACCIExporter from .admin_boundaries import KenyaAdminExporter +from .fewsnet_shapefiles import FEWSNetKenyaLivelihoodExporter __all__ = [ "ERA5Exporter", @@ -20,4 +21,5 @@ "ESACCIExporter", "ERA5LandExporter", "KenyaAdminExporter", + "FEWSNetKenyaLivelihoodExporter", ] diff --git a/src/preprocess/__init__.py b/src/preprocess/__init__.py index 77d915dff..35138bbf7 100644 --- a/src/preprocess/__init__.py +++ b/src/preprocess/__init__.py @@ -7,6 +7,7 @@ from .esa_cci import ESACCIPreprocessor from .srtm import SRTMPreprocessor from .admin_boundaries import KenyaAdminPreprocessor, KenyaASALMask +from .fewsnet_shapefiles import FEWSNetLivelihoodPreprocessor __all__ = [ "VHIPreprocessor", @@ -19,4 +20,5 @@ "SRTMPreprocessor", "KenyaAdminPreprocessor", "KenyaASALMask", + "FEWSNetLivelihoodPreprocessor" ] diff --git a/src/preprocess/fewsnet_shapefiles.py b/src/preprocess/fewsnet_shapefiles.py index 848078ef1..ed9f6b844 100644 --- a/src/preprocess/fewsnet_shapefiles.py +++ b/src/preprocess/fewsnet_shapefiles.py @@ -14,7 +14,7 @@ class FEWSNetPreprocesser(BasePreProcessor): """ Preprocesses the FEWSNetwork shapefile data """ - country_code_mapping: dict = { + country_code_mapping: Dict = { "AF": "Afghanistan", "AO": "Angola", "BF": "Burkina Faso", @@ -89,6 +89,7 @@ def _preprocess_single( var_name: str, lookup_colname: str, save: bool = True, + country_str: Optional[str] = None, ) -> Optional[xr.Dataset]: """ Preprocess .shp admin boundary files into an `.nc` file with the same shape as reference_nc_filepath. @@ -112,6 +113,10 @@ def _preprocess_single( lookup_colname: str the column name to lookup in the shapefile (read in as geopandas.GeoDataFrame) + + country_str: Optional[str] = None + the country you want to preprocess + """ assert "interim" in reference_nc_filepath.parts, ( "Expected " "the target data to have been preprocessed by the pipeline" @@ -126,7 +131,20 @@ def _preprocess_single( gdf = gpd.read_file(shp_filepath) shp_to_nc = SHPtoXarray() - for country_code in gdf.COUNTRY.unique(): + # if supply a country_str then only create .nc file for that country + country_lookup = dict(zip(self.country_code_mapping.values(), self.country_code_mapping.keys())) + if country_str is not None: + if country_str.capitalize() not in country_lookup.keys(): + assert False, ( + f"Expecting to have one of: \n{country_lookup.keys()}" + f"\nYou supplied: {country_str.capitalize()}" + "\nDoes this definitely exist?" + ) + country_code_list = [country_lookup[country_str.capitalize()]] + else: + country_code_list = gdf.COUNTRY.unique() + + for country_code in country_code_list: gdf_country = gdf.loc[gdf.COUNTRY == country_code] # create a unique filename for each country @@ -165,7 +183,7 @@ def _preprocess_single( f"** {(self.out_dir / filename).as_posix()} and lookup_dict saved! **" ) - def preprocess(self, reference_nc_filepath: Path) -> None: + def preprocess(self, reference_nc_filepath: Path, country_str: Optional[str] = None) -> None: """Preprocess FEWSNet Livelihood Zone shapefiles into xarray objects """ self._preprocess_single( @@ -173,4 +191,5 @@ def preprocess(self, reference_nc_filepath: Path) -> None: lookup_colname="LZCODE", reference_nc_filepath=reference_nc_filepath, var_name="livelihood_zone", + country_str=country_str ) From ac5b2820a53730ac35eba7128da8bd35779c0be0 Mon Sep 17 00:00:00 2001 From: tommylees112 Date: Thu, 6 Feb 2020 00:18:31 +0000 Subject: [PATCH 3/7] black code --- scripts/export.py | 2 +- scripts/preprocess.py | 4 ++-- src/exporters/fewsnet_shapefiles.py | 4 +--- src/preprocess/__init__.py | 2 +- src/preprocess/fewsnet_shapefiles.py | 10 +++++++--- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/scripts/export.py b/scripts/export.py index ff7e1696a..3a634202b 100644 --- a/scripts/export.py +++ b/scripts/export.py @@ -177,4 +177,4 @@ def export_fewsnet_shapefiles(): # export_esa() # export_s5() # export_kenya_boundaries() - export_fewsnet_shapefiles() \ No newline at end of file + export_fewsnet_shapefiles() diff --git a/scripts/preprocess.py b/scripts/preprocess.py index f614e68f9..9becc306f 100644 --- a/scripts/preprocess.py +++ b/scripts/preprocess.py @@ -12,7 +12,7 @@ SRTMPreprocessor, ERA5MonthlyMeanPreprocessor, KenyaASALMask, - FEWSNetLivelihoodPreprocessor + FEWSNetLivelihoodPreprocessor, ) from src.preprocess.admin_boundaries import KenyaAdminPreprocessor @@ -146,7 +146,7 @@ def preprocess_livelihood_zones(): assert regrid_path.exists(), f"{regrid_path} not available" processor = FEWSNetLivelihoodPreprocessor(data_path) - processor.preprocess(reference_nc_filepath=regrid_path, country_str='kenya') + processor.preprocess(reference_nc_filepath=regrid_path, country_str="kenya") if __name__ == "__main__": diff --git a/src/exporters/fewsnet_shapefiles.py b/src/exporters/fewsnet_shapefiles.py index bf6d27267..a1a74a344 100644 --- a/src/exporters/fewsnet_shapefiles.py +++ b/src/exporters/fewsnet_shapefiles.py @@ -34,9 +34,7 @@ def wget_file(self, url_path: str) -> None: def unzip(self, fname: Path) -> None: print(f"Unzipping {fname.name}") - os.system( - f"unzip {fname.as_posix()} -d {self.output_dir.resolve().as_posix()}" - ) + os.system(f"unzip {fname.as_posix()} -d {self.output_dir.resolve().as_posix()}") print(f"{fname.name} unzipped!") diff --git a/src/preprocess/__init__.py b/src/preprocess/__init__.py index 35138bbf7..a1769df6f 100644 --- a/src/preprocess/__init__.py +++ b/src/preprocess/__init__.py @@ -20,5 +20,5 @@ "SRTMPreprocessor", "KenyaAdminPreprocessor", "KenyaASALMask", - "FEWSNetLivelihoodPreprocessor" + "FEWSNetLivelihoodPreprocessor", ] diff --git a/src/preprocess/fewsnet_shapefiles.py b/src/preprocess/fewsnet_shapefiles.py index ed9f6b844..5bd903782 100644 --- a/src/preprocess/fewsnet_shapefiles.py +++ b/src/preprocess/fewsnet_shapefiles.py @@ -132,7 +132,9 @@ def _preprocess_single( shp_to_nc = SHPtoXarray() # if supply a country_str then only create .nc file for that country - country_lookup = dict(zip(self.country_code_mapping.values(), self.country_code_mapping.keys())) + country_lookup = dict( + zip(self.country_code_mapping.values(), self.country_code_mapping.keys()) + ) if country_str is not None: if country_str.capitalize() not in country_lookup.keys(): assert False, ( @@ -183,7 +185,9 @@ def _preprocess_single( f"** {(self.out_dir / filename).as_posix()} and lookup_dict saved! **" ) - def preprocess(self, reference_nc_filepath: Path, country_str: Optional[str] = None) -> None: + def preprocess( + self, reference_nc_filepath: Path, country_str: Optional[str] = None + ) -> None: """Preprocess FEWSNet Livelihood Zone shapefiles into xarray objects """ self._preprocess_single( @@ -191,5 +195,5 @@ def preprocess(self, reference_nc_filepath: Path, country_str: Optional[str] = N lookup_colname="LZCODE", reference_nc_filepath=reference_nc_filepath, var_name="livelihood_zone", - country_str=country_str + country_str=country_str, ) From da9bc62300b5ad847f94bb1beeded2ca5dfae25a Mon Sep 17 00:00:00 2001 From: tommylees112 Date: Thu, 6 Feb 2020 00:32:54 +0000 Subject: [PATCH 4/7] update the black / mypy errors --- scripts/preprocess.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/preprocess.py b/scripts/preprocess.py index 9becc306f..c681dc8f3 100644 --- a/scripts/preprocess.py +++ b/scripts/preprocess.py @@ -142,11 +142,15 @@ def preprocess_era5(): def preprocess_livelihood_zones(): data_path = get_data_path() + from pathlib import Path + data_path = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data/') + + regrid_path = data_path / "interim/chirps_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = FEWSNetLivelihoodPreprocessor(data_path) - processor.preprocess(reference_nc_filepath=regrid_path, country_str="kenya") + processor.preprocess(reference_nc_filepath=regrid_path, country_to_preprocess="kenya") if __name__ == "__main__": From eb1e2627b4605c010fdfbe68b00440b2d947a284 Mon Sep 17 00:00:00 2001 From: tommylees112 Date: Thu, 6 Feb 2020 00:33:55 +0000 Subject: [PATCH 5/7] update the black / mypy errors --- scripts/preprocess.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/preprocess.py b/scripts/preprocess.py index c681dc8f3..8256ce627 100644 --- a/scripts/preprocess.py +++ b/scripts/preprocess.py @@ -143,14 +143,16 @@ def preprocess_livelihood_zones(): data_path = get_data_path() from pathlib import Path - data_path = Path('/Volumes/Lees_Extend/data/ecmwf_sowc/data/') + data_path = Path("/Volumes/Lees_Extend/data/ecmwf_sowc/data/") regrid_path = data_path / "interim/chirps_preprocessed/data_kenya.nc" assert regrid_path.exists(), f"{regrid_path} not available" processor = FEWSNetLivelihoodPreprocessor(data_path) - processor.preprocess(reference_nc_filepath=regrid_path, country_to_preprocess="kenya") + processor.preprocess( + reference_nc_filepath=regrid_path, country_to_preprocess="kenya" + ) if __name__ == "__main__": From c1b627a60505230f7acc25cace5681533fdf2d2b Mon Sep 17 00:00:00 2001 From: tommylees112 Date: Thu, 6 Feb 2020 00:34:43 +0000 Subject: [PATCH 6/7] fix flake / mypy --- src/exporters/fewsnet_shapefiles.py | 2 +- src/preprocess/fewsnet_shapefiles.py | 23 +++++++++++------------ src/preprocess/utils.py | 14 +++++++------- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/exporters/fewsnet_shapefiles.py b/src/exporters/fewsnet_shapefiles.py index a1a74a344..d946cd4bf 100644 --- a/src/exporters/fewsnet_shapefiles.py +++ b/src/exporters/fewsnet_shapefiles.py @@ -40,7 +40,7 @@ def unzip(self, fname: Path) -> None: class FEWSNetKenyaLivelihoodExporter(FEWSNetExporter): data_str = "livelihood_zones" - url: List = "http://shapefiles.fews.net.s3.amazonaws.com/LHZ/FEWS_NET_LH_World.zip" + url: str = "http://shapefiles.fews.net.s3.amazonaws.com/LHZ/FEWS_NET_LH_World.zip" def export(self) -> None: """Export functionality for the FEWSNET Livelihood Zones as .shp files diff --git a/src/preprocess/fewsnet_shapefiles.py b/src/preprocess/fewsnet_shapefiles.py index 5bd903782..d5712ec1f 100644 --- a/src/preprocess/fewsnet_shapefiles.py +++ b/src/preprocess/fewsnet_shapefiles.py @@ -1,6 +1,5 @@ from pathlib import Path import xarray as xr -from collections import namedtuple from .base import BasePreProcessor from .utils import SHPtoXarray @@ -89,8 +88,8 @@ def _preprocess_single( var_name: str, lookup_colname: str, save: bool = True, - country_str: Optional[str] = None, - ) -> Optional[xr.Dataset]: + country_to_preprocess: Optional[str] = None, + ) -> None: """ Preprocess .shp admin boundary files into an `.nc` file with the same shape as reference_nc_filepath. @@ -114,7 +113,7 @@ def _preprocess_single( the column name to lookup in the shapefile (read in as geopandas.GeoDataFrame) - country_str: Optional[str] = None + country_to_preprocess: Optional[str] = None the country you want to preprocess """ @@ -128,21 +127,21 @@ def _preprocess_single( da = target_ds[data_var] # turn the shapefile into a categorical variable (like landcover) - gdf = gpd.read_file(shp_filepath) + gdf = gpd.read_file(shp_filepath) # type: ignore shp_to_nc = SHPtoXarray() - # if supply a country_str then only create .nc file for that country + # if supply a country_to_preprocess then only create .nc file for that country country_lookup = dict( zip(self.country_code_mapping.values(), self.country_code_mapping.keys()) ) - if country_str is not None: - if country_str.capitalize() not in country_lookup.keys(): + if country_to_preprocess is not None: + if country_to_preprocess.capitalize() not in country_lookup.keys(): assert False, ( f"Expecting to have one of: \n{country_lookup.keys()}" - f"\nYou supplied: {country_str.capitalize()}" + f"\nYou supplied: {country_to_preprocess.capitalize()}" "\nDoes this definitely exist?" ) - country_code_list = [country_lookup[country_str.capitalize()]] + country_code_list = [country_lookup[country_to_preprocess.capitalize()]] else: country_code_list = gdf.COUNTRY.unique() @@ -186,7 +185,7 @@ def _preprocess_single( ) def preprocess( - self, reference_nc_filepath: Path, country_str: Optional[str] = None + self, reference_nc_filepath: Path, country_to_preprocess: Optional[str] = None ) -> None: """Preprocess FEWSNet Livelihood Zone shapefiles into xarray objects """ @@ -195,5 +194,5 @@ def preprocess( lookup_colname="LZCODE", reference_nc_filepath=reference_nc_filepath, var_name="livelihood_zone", - country_str=country_str, + country_to_preprocess=country_to_preprocess, ) diff --git a/src/preprocess/utils.py b/src/preprocess/utils.py index c6fb9779b..cbb9be461 100644 --- a/src/preprocess/utils.py +++ b/src/preprocess/utils.py @@ -147,7 +147,7 @@ def rasterize( def _to_xarray( self, da: xr.DataArray, - gdf: GeoDataFrame, + gdf: GeoDataFrame, # type: ignore var_name: str = "region", lookup_colname: Optional[str] = None, ) -> xr.Dataset: @@ -161,21 +161,21 @@ def _to_xarray( # allow the user to see the column headers if lookup_colname is None: print("lookup_colname MUST be provided (see error message below)") - print(gdf.head()) + print(gdf.head()) # type: ignore assert ( - lookup_colname in gdf.columns - ), f"lookup_colname must be one of: {list(gdf.columns)}" + lookup_colname in gdf.columns # type: ignore + ), f"lookup_colname must be one of: {list(gdf.columns)}" # type: ignore # 2. create a list of tuples (shapely.geometry, id) # this allows for many different polygons within a .shp file # (e.g. Admin Regions of Kenya) - shapes = [(shape, n) for n, shape in enumerate(gdf.geometry)] + shapes = [(shape, n) for n, shape in enumerate(gdf.geometry)] # type: ignore # 3. create a new variable set to the id in `shapes` (same shape as da) ds = self.rasterize(shapes=shapes, coords=da.coords, variable_name=var_name) - values = [value for value in gdf[lookup_colname].tolist()] - keys = [str(key) for key in gdf.index.tolist()] + values = [value for value in gdf[lookup_colname].tolist()] # type: ignore + keys = [str(key) for key in gdf.index.tolist()] # type: ignore data_vals = ds[[d for d in ds.data_vars][0]].values unique_values = np.unique(data_vals[~np.isnan(data_vals)]) unique_values = [str(int(v)) for v in unique_values] From a09392e8fe7053af9b5f2b40bbf786a5173cad10 Mon Sep 17 00:00:00 2001 From: tommylees112 Date: Thu, 6 Feb 2020 11:08:07 +0000 Subject: [PATCH 7/7] fix flake error --- src/exporters/fewsnet_shapefiles.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/exporters/fewsnet_shapefiles.py b/src/exporters/fewsnet_shapefiles.py index d946cd4bf..657820ddc 100644 --- a/src/exporters/fewsnet_shapefiles.py +++ b/src/exporters/fewsnet_shapefiles.py @@ -1,6 +1,5 @@ from pathlib import Path import os -from typing import List from .base import BaseExporter