Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use fewsnet livelihood zones #148

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion scripts/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
ERA5LandExporter,
)

from src.exporters import FEWSNetKenyaLivelihoodExporter

from scripts.utils import get_data_path


Expand Down Expand Up @@ -159,8 +161,14 @@ def export_kenya_boundaries():
exporter.export()


def export_fewsnet_shapefiles():
data_path = get_data_path()
exporter = FEWSNetKenyaLivelihoodExporter(data_path)
exporter.export()


if __name__ == "__main__":
export_era5_land()
# export_era5_land()
# export_era5()
# export_vhi()
# export_chirps()
Expand All @@ -169,3 +177,4 @@ def export_kenya_boundaries():
# export_esa()
# export_s5()
# export_kenya_boundaries()
export_fewsnet_shapefiles()
41 changes: 30 additions & 11 deletions scripts/preprocess.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys

sys.path.append("..")

from src.preprocess import (
VHIPreprocessor,
CHIRPSPreprocessor,
Expand All @@ -11,6 +12,7 @@
SRTMPreprocessor,
ERA5MonthlyMeanPreprocessor,
KenyaASALMask,
FEWSNetLivelihoodPreprocessor,
)

from src.preprocess.admin_boundaries import KenyaAdminPreprocessor
Expand Down Expand Up @@ -137,15 +139,32 @@ def preprocess_era5():
processor.preprocess(subset_str="kenya", regrid=regrid_path)


def preprocess_livelihood_zones():
data_path = get_data_path()

from pathlib import Path

data_path = Path("/Volumes/Lees_Extend/data/ecmwf_sowc/data/")

regrid_path = data_path / "interim/chirps_preprocessed/data_kenya.nc"
assert regrid_path.exists(), f"{regrid_path} not available"

processor = FEWSNetLivelihoodPreprocessor(data_path)
processor.preprocess(
reference_nc_filepath=regrid_path, country_to_preprocess="kenya"
)


if __name__ == "__main__":
process_vci_2018()
process_precip_2018()
process_era5POS_2018()
process_gleam()
process_esa_cci_landcover()
preprocess_srtm()
preprocess_era5()
preprocess_kenya_boundaries(selection="level_1")
preprocess_kenya_boundaries(selection="level_2")
preprocess_kenya_boundaries(selection="level_3")
preprocess_asal_mask()
# process_vci_2018()
# process_precip_2018()
# process_era5POS_2018()
# process_gleam()
# process_esa_cci_landcover()
# preprocess_srtm()
# preprocess_era5()
# preprocess_kenya_boundaries(selection="level_1")
# preprocess_kenya_boundaries(selection="level_2")
# preprocess_kenya_boundaries(selection="level_3")
# preprocess_asal_mask()
preprocess_livelihood_zones()
2 changes: 2 additions & 0 deletions src/exporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .srtm import SRTMExporter
from .esa_cci import ESACCIExporter
from .admin_boundaries import KenyaAdminExporter
from .fewsnet_shapefiles import FEWSNetKenyaLivelihoodExporter

__all__ = [
"ERA5Exporter",
Expand All @@ -20,4 +21,5 @@
"ESACCIExporter",
"ERA5LandExporter",
"KenyaAdminExporter",
"FEWSNetKenyaLivelihoodExporter",
]
55 changes: 55 additions & 0 deletions src/exporters/fewsnet_shapefiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from pathlib import Path
import os

from .base import BaseExporter


class FEWSNetExporter(BaseExporter):
"""Export FEWSNet data

https://fews.net/

TODO: need to use Selenium to navigate this page?
https://fews.net/data
"""

data_str: str

def __init__(self, data_folder: Path = Path("data")) -> None:
super().__init__(data_folder)
# write the download to landcover
self.output_dir = self.raw_folder / "boundaries" / self.data_str
if not self.output_dir.exists():
self.output_dir.mkdir(parents=True, exist_ok=True)

def wget_file(self, url_path: str) -> None:
output_file = self.output_dir / url_path.split("/")[-1]
if output_file.exists():
print(f"{output_file} already exists! Skipping")
return None

os.system(f"wget {url_path} -P {self.output_dir.as_posix()}")

def unzip(self, fname: Path) -> None:
print(f"Unzipping {fname.name}")

os.system(f"unzip {fname.as_posix()} -d {self.output_dir.resolve().as_posix()}")
print(f"{fname.name} unzipped!")


class FEWSNetKenyaLivelihoodExporter(FEWSNetExporter):
data_str = "livelihood_zones"
url: str = "http://shapefiles.fews.net.s3.amazonaws.com/LHZ/FEWS_NET_LH_World.zip"

def export(self) -> None:
"""Export functionality for the FEWSNET Livelihood Zones as .shp files
"""

fname = self.url.split("/")[-1]
# check if the file already exists
if (self.output_dir / fname).exists():
print("Data already downloaded!")

else:
self.wget_file(url_path=self.url)
self.unzip(fname=(self.output_dir / fname))
2 changes: 2 additions & 0 deletions src/preprocess/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .esa_cci import ESACCIPreprocessor
from .srtm import SRTMPreprocessor
from .admin_boundaries import KenyaAdminPreprocessor, KenyaASALMask
from .fewsnet_shapefiles import FEWSNetLivelihoodPreprocessor

__all__ = [
"VHIPreprocessor",
Expand All @@ -19,4 +20,5 @@
"SRTMPreprocessor",
"KenyaAdminPreprocessor",
"KenyaASALMask",
"FEWSNetLivelihoodPreprocessor",
]
198 changes: 198 additions & 0 deletions src/preprocess/fewsnet_shapefiles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
from pathlib import Path
import xarray as xr
from .base import BasePreProcessor
from .utils import SHPtoXarray

from typing import Optional, Dict

gpd = None
GeoDataFrame = None


class FEWSNetPreprocesser(BasePreProcessor):
""" Preprocesses the FEWSNetwork shapefile data
"""

country_code_mapping: Dict = {
"AF": "Afghanistan",
"AO": "Angola",
"BF": "Burkina Faso",
"BI": "Burundi",
"CF": "CAR",
"DJ": "Djibouti",
"TZ": "Tanzania",
"ZW": "Zimbabwe",
"ZM": "Zambia",
"YE": "Yemen",
"UG": "Uganda",
"TD": "Chad",
"SV": "El Salvador",
"SN": "Senegal",
"SO": "Somalia",
"SL": "Sierra Leone",
"SD": "Sudan",
"NI": "Nicaragua",
"NG": "Nigeria",
"ET": "Ethiopia",
"NE": "Niger",
"GN": "Guinea",
"MZ": "Mozambique",
"HN": "Honduras",
"MW": "Malawi",
"ML": "Mali",
"MR": "Mauritania",
"HT": "Haiti",
"MG": "Madagascar",
"LR": "Liberia",
"KE": "Kenya",
"LS": "Lesotho",
"CD": "DR Congo",
"SS": "South Sudan",
"RW": "Rwanda",
"NI": "Nicaragua",
"TJ": "Tajikistan",
"GT": "Guatemala",
}

dataset: str
analysis = True

def __init__(self, data_folder: Path = Path("data")):
super().__init__(data_folder)

# try and import geopandas
print("The FEWSNet preprocessor requires the geopandas package")
global gpd
if gpd is None:
import geopandas as gpd
global GeoDataFrame
if GeoDataFrame is None:
from geopandas.geodataframe import GeoDataFrame

def get_filename(self, var_name: str, country: str) -> str: # type: ignore
new_filename = f"{var_name}_{country}.nc"
return new_filename


class FEWSNetLivelihoodPreprocessor(FEWSNetPreprocesser):
dataset = "livelihood_zones"

def __init__(self, data_folder: Path = Path("data")) -> None:
super().__init__(data_folder)
self.base_raw_dir = self.raw_folder / "boundaries" / self.dataset

def _preprocess_single(
self,
shp_filepath: Path,
reference_nc_filepath: Path,
var_name: str,
lookup_colname: str,
save: bool = True,
country_to_preprocess: Optional[str] = None,
) -> None:
""" Preprocess .shp admin boundary files into an `.nc`
file with the same shape as reference_nc_filepath.

Will create categorical .nc file which will specify
which admin region each pixel is in.

Arguments
----------
shp_filepath: Path
The path to the shapefile

reference_nc_filepath: Path
The path to the netcdf file with the shape
(must have been run through Preprocessors prior to using)

var_name: str
the name of the Variable in the xr.Dataset and the name
of the output filename - {var_name}_{self.country}.nc

lookup_colname: str
the column name to lookup in the shapefile
(read in as geopandas.GeoDataFrame)

country_to_preprocess: Optional[str] = None
the country you want to preprocess

"""
assert "interim" in reference_nc_filepath.parts, (
"Expected " "the target data to have been preprocessed by the pipeline"
)

# MUST have a target dataset to create the same shape
target_ds = xr.ones_like(xr.open_dataset(reference_nc_filepath))
data_var = [d for d in target_ds.data_vars][0]
da = target_ds[data_var]

# turn the shapefile into a categorical variable (like landcover)
gdf = gpd.read_file(shp_filepath) # type: ignore
shp_to_nc = SHPtoXarray()

# if supply a country_to_preprocess then only create .nc file for that country
country_lookup = dict(
zip(self.country_code_mapping.values(), self.country_code_mapping.keys())
)
if country_to_preprocess is not None:
if country_to_preprocess.capitalize() not in country_lookup.keys():
assert False, (
f"Expecting to have one of: \n{country_lookup.keys()}"
f"\nYou supplied: {country_to_preprocess.capitalize()}"
"\nDoes this definitely exist?"
)
country_code_list = [country_lookup[country_to_preprocess.capitalize()]]
else:
country_code_list = gdf.COUNTRY.unique()

for country_code in country_code_list:
gdf_country = gdf.loc[gdf.COUNTRY == country_code]

# create a unique filename for each country
country_str = (
self.country_code_mapping[country_code].lower().replace(" ", "_")
)
filename = self.get_filename(var_name, country_str)
if (self.out_dir / filename).exists():
print(
"** Data already preprocessed! **\nIf you need to "
"process again then move or delete existing file"
f" at: {(self.out_dir / filename).as_posix()}"
)
continue

ds = shp_to_nc._to_xarray(
da=da, gdf=gdf_country, var_name=var_name, lookup_colname=lookup_colname
)

# save the data
print(f"Saving to {self.out_dir}")

if self.analysis is True:
assert self.out_dir.parts[-2] == "analysis", (
"self.analysis should"
"be True and the output directory should be analysis"
)

ds.to_netcdf(self.out_dir / filename)
# save key info columns
gdf_country[
["OBJECTID", "FNID", "LZNUM", "LZCODE", "LZNAMEEN", "CLASS"]
].to_csv(self.out_dir / f"{country_str}_lookup_dict.csv")

print(
f"** {(self.out_dir / filename).as_posix()} and lookup_dict saved! **"
)

def preprocess(
self, reference_nc_filepath: Path, country_to_preprocess: Optional[str] = None
) -> None:
"""Preprocess FEWSNet Livelihood Zone shapefiles into xarray objects
"""
self._preprocess_single(
shp_filepath=self.base_raw_dir / "FEWS_NET_LH_World.shp",
lookup_colname="LZCODE",
reference_nc_filepath=reference_nc_filepath,
var_name="livelihood_zone",
country_to_preprocess=country_to_preprocess,
)
Loading