Skip to content

Commit

Permalink
Merge branch 'main' into torch-config
Browse files Browse the repository at this point in the history
  • Loading branch information
danyoungday authored Mar 22, 2024
2 parents 7f5bdf9 + d5619d7 commit c3b5b07
Show file tree
Hide file tree
Showing 27 changed files with 439 additions and 137 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/eluc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# This runs the unit tests for the ELUC use case

name: ELUC Use Case

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./use_cases/eluc
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Set PYTHONPATH
run: echo "PYTHONPATH=$PWD" >> $GITHUB_ENV
- name: Test PYTHONPATH
run: printenv PYTHONPATH
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Lint with PyLint
run: pylint --ignore="demo" --recursive=y --fail-under=9 ./*
- name: Run unit tests
run: python -m unittest

10 changes: 10 additions & 0 deletions use_cases/eluc/.pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[MASTER]
ignore=demo

jobs=0

max-line-length=120

suggestion-mode=yes

good-names=X_train, X_val, X_test, y_train, y_val, y_test, X, Y, y, X_test_scaled
2 changes: 1 addition & 1 deletion use_cases/eluc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ BLUE simulations with committed emissions could be used to estimate the long-ter
"Committed emissions" means all the emissions that are caused by a land-use change event are attributed to the year
of the event.
BLUE (bookkeeping of land use emissions) is a bookkeeping model that attributes carbon fluxes to land use activities.
See [BLUE: Bookkeeping of land use emissions](https://doi.org/10.1002/2014GB004997) for more details.
See [BLUE: Bookkeeping of land use emissions](https://doi.org/10.1002/2014GB004997) for more details.

### LUC

Expand Down
7 changes: 4 additions & 3 deletions use_cases/eluc/data/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
CODES_PATH = "data/codes.csv"

# Different variations of land-use change columns
LAND_USE_COLS = ['c3ann', 'c3nfx', 'c3per','c4ann', 'c4per',
'pastr', 'primf', 'primn',
LAND_USE_COLS = ['c3ann', 'c3nfx', 'c3per','c4ann', 'c4per',
'pastr', 'primf', 'primn',
'range', 'secdf', 'secdn', 'urban']
CROP_COLS = ['c3ann', 'c3nfx', 'c3per','c4ann', 'c4per']
LAND_USE_COLS = ["crop"] + [col for col in LAND_USE_COLS if col not in CROP_COLS]
Expand All @@ -29,7 +29,8 @@

# ["United Kingdom", "France", "Germany", "Netherlands", "Belgium", "Switzerland", "Ireland"]
EU_COUNTRIES = ["GB", "FR", "DE", "NL", "BE", "CH", "IE"]
# ["Brazil", "Bolivia", "Paraguay", "Peru", "Ecuador", "Colombia", "Venezuela", "Guyana", "Suriname", "Uruguay", "Argentina", "Chile"]
# ["Brazil", "Bolivia", "Paraguay", "Peru", "Ecuador", "Colombia",
# "Venezuela", "Guyana", "Suriname", "Uruguay", "Argentina", "Chile"]
SA_COUNTRIES = ["BR", "BO", "PY", "PE", "EC", "CO", "VE", "GY", "SR", "UY", "AR", "CL"]
# ["United States"]
US_COUNTRIES = ["US"]
Expand Down
4 changes: 2 additions & 2 deletions use_cases/eluc/data/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from data import constants

# TODO: Note: This table is not perfect and has some errors,
# we should consider manually fixing them. I tried my best but
# we should consider manually fixing them. I tried my best but
# I'm not 100% sure it's correct.
MANUAL_MAP = {
"INDO": 360,
Expand Down Expand Up @@ -57,7 +57,7 @@ def construct_countries_df():
# Replace all the bad codes with their real ones
for i in range(len(countries_df)):
old_abbrev = countries_df.iloc[i]["abbrevs"]
if old_abbrev in MANUAL_MAP.keys() and MANUAL_MAP[old_abbrev] in codes_df["Numeric code"].unique():
if old_abbrev in MANUAL_MAP and MANUAL_MAP[old_abbrev] in codes_df["Numeric code"].unique():
countries_df.iloc[i]["abbrevs"] = codes_df[codes_df["Numeric code"] == MANUAL_MAP[old_abbrev]]["Alpha-2 code"].iloc[0]

return countries_df
47 changes: 26 additions & 21 deletions use_cases/eluc/data/eluc_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def encode_as_df(self, df: pd.DataFrame) -> pd.DataFrame:
if min_val == max_val:
new_df[col] = 0
else:
new_df[col] = (new_df[col] - self.fields[col]["range"][0]) / (self.fields[col]["range"][1] - self.fields[col]["range"][0])
new_df[col] = (new_df[col] - min_val) / (max_val - min_val)
return new_df

def decode_as_df(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Decodes a dataframe using the fields given in the constructor.
Expand All @@ -53,7 +53,9 @@ def decode_as_df(self, df: pd.DataFrame) -> pd.DataFrame:
new_df = df.copy()
for col in new_df.columns:
if col in self.fields:
new_df[col] = new_df[col] * (self.fields[col]["range"][1] - self.fields[col]["range"][0]) + self.fields[col]["range"][0]
min_val = self.fields[col]["range"][0]
max_val = self.fields[col]["range"][1]
new_df[col] = new_df[col] * (max_val - min_val) + min_val
return new_df


Expand Down Expand Up @@ -87,22 +89,23 @@ def get_encoded_train(self):
if self.encoded_train_df is None:
self.encoded_train_df = self.encoder.encode_as_df(self.train_df)
return self.encoded_train_df

def get_encoded_test(self):
"""
Same as above but for test data.
"""
if self.encoded_test_df is None:
self.encoded_test_df = self.encoder.encode_as_df(self.test_df)
return self.encoded_test_df

def get_fields(self) -> dict:
"""
Creates fields json object for the data encoder/prescriptor.
"""
fields_df = self.train_df[constants.CAO_MAPPING["context"] + constants.CAO_MAPPING["actions"] + ["ELUC"]].astype("float64")
fields = dict()
for col in constants.CAO_MAPPING["context"] + constants.CAO_MAPPING["actions"] + ["ELUC"]:
cao_cols = constants.CAO_MAPPING["context"] + constants.CAO_MAPPING["actions"] + ["ELUC"]
fields_df = self.train_df[cao_cols].astype("float64")
fields = {}
for col in cao_cols:
# Set range of land and diff land uses manually to their true ranges because they
# do not need to be scaled
if col in constants.LAND_USE_COLS:
Expand Down Expand Up @@ -132,28 +135,27 @@ def get_fields(self) -> dict:
"valued": "CONTINUOUS"
}

return fields
return fields

def push_to_hf(self, repo_path, commit_message, token=None):
"""
Pushes data to huggingface repo. Don't use this unless you're sure you want to update it!
:param repo_path: Path to huggingface repo.
"""

whole_df = pd.concat([self.train_df, self.test_df])
# We get the indices as columns anyways so we can drop them
whole_df = whole_df.drop(["lat", "lon", "time"], axis=1)
ds = Dataset.from_pandas(whole_df)
if not token:
token = os.getenv("HF_TOKEN")
ds.push_to_hub(repo_path, commit_message=commit_message, token=token)


class ELUCData(AbstractData):
"""
Loads ELUC data from HuggingFace repo and processes it.
"""

def __init__(self, start_year=1851, test_year=2012, end_year=2022, countries=None):
"""
If update_path is given, load raw data the old way using 2 files that are merged.
Expand All @@ -169,12 +171,13 @@ def __init__(self, start_year=1851, test_year=2012, end_year=2022, countries=Non

self.train_df = df.loc[start_year:test_year-1]
self.test_df = df.loc[test_year:end_year-1]

self.encoder = ELUCEncoder(self.get_fields())

def hf_to_df(self, hf_repo):
"""
Loads dataset from huggingface, converts to pandas, then sets indices appropriately to time/lat/lon.
Loads dataset from huggingface, converts to pandas, then sets indices
appropriately to time/lat/lon.
Keep old time/lat/lon columns so we can use them as features later.
"""
ds = load_dataset(hf_repo)["train"]
Expand All @@ -194,7 +197,7 @@ def __init__(self, path, update_path, start_year=1851, test_year=2012, end_year=

self.train_df = df.loc[start_year:test_year-1]
self.test_df = df.loc[test_year:end_year-1]

self.encoder = ELUCEncoder(self.get_fields())

def import_data(self, path, update_path):
Expand All @@ -217,15 +220,17 @@ def import_data(self, path, update_path):
raw = raw.merge(eluc)

# Shift actions back a year
raw_diffs = ['c3ann', 'c3nfx', 'c3per','c4ann', 'c4per', 'pastr', 'primf', 'primn', 'range', 'secdf', 'secdn', 'urban']
raw_diffs = ['c3ann', 'c3nfx', 'c3per','c4ann', 'c4per',
'pastr', 'primf', 'primn', 'range',
'secdf', 'secdn', 'urban']
raw_diffs = [f"{col}_diff" for col in raw_diffs]
raw[raw_diffs] = raw[raw_diffs].shift(time=-1)

# Finds country for each cell using lat/lon coordinates
country_mask = regionmask.defined_regions.natural_earth_v5_0_0.countries_110.mask(raw)
raw["country"] = country_mask
return raw

def da_to_df(self, da: xr.DataArray, start_year=None, end_year=None, countries=None) -> pd.DataFrame:
"""
Converts an xarray DataArray to a pandas DataFrame.
Expand Down Expand Up @@ -259,10 +264,10 @@ def da_to_df(self, da: xr.DataArray, start_year=None, end_year=None, countries=N
# Merge crops into one column because BLUE model doesn't differentiate
df["crop"] = df[constants.CROP_COLS].sum(axis=1)
df["crop_diff"] = df[[f"{c}_diff" for c in constants.CROP_COLS]].sum(axis=1)

df['country_name'] = self.countries_df.loc[df['country'], 'names'].values

# Drop this column we used for preprocessing (?)
df = df.drop("mask", axis=1)

return df
4 changes: 2 additions & 2 deletions use_cases/eluc/data/torch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class TorchDataset(Dataset):
:param y: labels
"""
def __init__(self, X: np.ndarray, y: np.ndarray, device="cpu"):
super().__init__()
super().__init__()
self.X = torch.tensor(X, dtype=torch.float32, device=device)
self.y = torch.tensor(y, device=device)
assert len(self.X) == len(self.y), "X and y must have the same length"
Expand All @@ -24,4 +24,4 @@ def __len__(self):
return len(self.X)

def __getitem__(self, idx: int) -> tuple:
return self.X[idx], self.y[idx]
return self.X[idx], self.y[idx]
Empty file.
Loading

0 comments on commit c3b5b07

Please sign in to comment.