Fix some tests, some still failing but blocked by pyam regression

See IAMconsortium/pyam#793
znicholls · Oct 24, 2023 · 3d8bbf4 · 3d8bbf4
1 parent 30b8f99
commit 3d8bbf4
Show file tree

Hide file tree

Showing 9 changed files with 41 additions and 19 deletions.
diff --git a/.github/workflows/ci-cd-workflow.yml b/.github/workflows/ci-cd-workflow.yml
@@ -25,7 +25,6 @@ jobs:
     - name: Formatting and linters
       run: |
         black --check src scripts tests setup.py --exclude doc/conf.py
-        black-nb --check notebooks
         isort --check-only --quiet src scripts tests setup.py doc/conf.py
         flake8 src scripts tests setup.py doc/conf.py
 

diff --git a/setup.cfg b/setup.cfg
@@ -35,8 +35,8 @@ install_requires =
     joblib
     matplotlib
     numpy
-    openscm-units==0.5.0
-    openscm-runner==0.9.1
+    openscm-units
+    openscm-runner
     pandas
     pint>=0.13
     pooch

diff --git a/src/climate_assessment/checks.py b/src/climate_assessment/checks.py
@@ -172,7 +172,7 @@ def count_variables_very_high(
     # mark the scenarios that are not sufficiently infilled for climate assessment:
     for v in vars:
         for y in required_years:
-            df.require_variable(v, year=y, exclude_on_fail=True)
+            df.require_data(variable=v, year=y, exclude_on_fail=True)
     # filter out the marked scenarios
     df.filter(exclude=False, inplace=True)
     numvars = len(df.filter(variable=str(prefix + "Emissions|*"), level=0).variable)
@@ -714,14 +714,21 @@ def require_allyears(
 
     # check for baseyear
     if output_csv:
-        dft_out = dft_out.append(dft[(dft[base_yr].isna()) & (dft[low_yr].isna())])
+        dft_out = pd.concat([
+            dft_out,
+            dft[(dft[base_yr].isna()) & (dft[low_yr].isna())],
+        ])
+
     dft = dft[~((dft[base_yr].isna()) & (dft[low_yr].isna()))]
 
     # check for model years
     # TODO: find better way than doing loop
     for yr in required_years:
         if output_csv:
-            dft_out = dft_out.append(dft[(dft[yr].isna())])
+            dft_out = pd.concat([
+                dft_out,
+                dft[(dft[yr].isna())],
+            ])
         dft = dft[~(dft[yr].isna())]
 
     # write out if wanted
@@ -751,7 +758,7 @@ def require_allyears_and_drop_scenarios(
             # mark the scenarios that are not sufficiently infilled for climate assessment:
             for v in vars:
                 for y in required_years:
-                    df_scen.require_variable(v, year=y, exclude_on_fail=True)
+                    df_scen.require_data(variable=v, year=y, exclude_on_fail=True)
             df_scen_out = df_scen.filter(exclude=True, inplace=False)
             df_scen.filter(exclude=False, inplace=True)
             if not df_scen.empty:
@@ -785,15 +792,15 @@ def reclassify_waste_and_other_co2_ar6(df):
     """
     # filter out the scenarios that do not need changes
     df_nochange = df.copy()
-    df_nochange.require_variable(
+    df_nochange.require_data(
         variable=["Emissions|CO2|Other", "Emissions|CO2|Waste"], exclude_on_fail=True
     )
     df_nochange.filter(exclude=True, inplace=True)
     df_nochange.reset_exclude()
 
     # select the scenarios that do need changes
     df_change = df.copy()
-    df_change.require_variable(
+    df_change.require_data(
         variable=["Emissions|CO2|Other", "Emissions|CO2|Waste"], exclude_on_fail=True
     )
     df_change.filter(exclude=False, inplace=True)
@@ -816,7 +823,6 @@ def reclassify_waste_and_other_co2_ar6(df):
         "Emissions|CO2|Energy and Industrial Processes|Incomplete",
     ]
     df_change_notaffected_pd = df_change_pd[~df_change_pd.variable.isin(varsum)]
-    df_change_notaffected_pd = df_change_notaffected_pd.drop("exclude", axis=1)
     df_change_notaffected_pyam = pyam.IamDataFrame(df_change_notaffected_pd)
     df_change_pd = df_change_pd[df_change_pd.variable.isin(varsum)]
     df_change_pd = df_change_pd.groupby(
@@ -826,7 +832,6 @@ def reclassify_waste_and_other_co2_ar6(df):
     df_change_pd["variable"] = "Emissions|CO2|Energy and Industrial Processes"
     df_change_pd["unit"] = "Mt CO2/yr"
     df_change_pd["region"] = "World"
-    df_change_pd.drop("exclude", axis=1, inplace=True)
     df_change_pyam = pyam.IamDataFrame(df_change_pd)
     df_change = pyam.concat([df_change_pyam, df_change_notaffected_pyam])
 

diff --git a/src/climate_assessment/climate/post_process.py b/src/climate_assessment/climate/post_process.py
@@ -520,7 +520,8 @@ def mangle_meta_table_climate_model(idf):
         return out
 
     meta_table = (
-        meta_table.groupby("climate_model")
+        meta_table
+        .groupby("climate_model", group_keys=False)
         .apply(mangle_meta_table_climate_model)
         .reset_index("climate_model", drop=True)
     )

diff --git a/src/climate_assessment/harmonization/__init__.py b/src/climate_assessment/harmonization/__init__.py
@@ -205,7 +205,6 @@ def run_harmonization(df, instance, prefix):
         filename="dropped_rows",
         harmonization_year=harmonization_year,
     )  # note: this process is repeated after harmonization. Before is slightly nicer, but not enough.
-    scenarios = scenarios.filter(year=output_timesteps).timeseries()
 
     # TODO: remove hard-coding
     overrides = pd.DataFrame(
@@ -300,6 +299,7 @@ def run_harmonization(df, instance, prefix):
     )
     LOGGER.info("Harmonisation overrides:\n%s", overrides)
 
+    scenarios = scenarios.filter(year=output_timesteps).timeseries()
     with parallel_progress_bar(tqdm.tqdm(desc="Harmonisation")):
         LOGGER.info("Harmonising in parallel")
         # TODO: remove hard-coding of n_jobs
@@ -313,6 +313,17 @@ def run_harmonization(df, instance, prefix):
             for _, msdf in scenarios.groupby(["model", "scenario"])
         )
 
+    LOGGER.info("Hacking around some regression in aneris - pyam stack")
+    def drop_broken_stuff(indf):
+        out = indf.copy()
+        idx_length = len(out.index.names)
+        drop_levels = list(range(idx_length // 2, idx_length))
+        out.index = out.index.droplevel(drop_levels)
+
+        return out
+
+    scenarios_harmonized = [drop_broken_stuff(s) for s in scenarios_harmonized]
+
     LOGGER.info("Combining results")
     scenarios_harmonized = pd.concat(scenarios_harmonized).reset_index()
 

diff --git a/src/climate_assessment/infilling/__init__.py b/src/climate_assessment/infilling/__init__.py
@@ -1,6 +1,7 @@
 import logging
 import os.path
 
+import pandas as pd
 import pandas.testing as pdt
 import pyam
 import scmdata
@@ -425,7 +426,10 @@ def _add_to_infilled(infilled, infilled_variables):
             LOGGER.debug("No timeseries infilled")
             return infilled
 
-        infilled = pyam.IamDataFrame(infilled.append(infilled_variables.loc[keep_idx]))
+        infilled = pyam.IamDataFrame(pd.concat([
+            infilled,
+            infilled_variables.loc[keep_idx]
+        ]))
 
     else:
         infilled = infilled_variables

diff --git a/src/climate_assessment/utils.py b/src/climate_assessment/utils.py
@@ -227,7 +227,7 @@ def add_gwp100_kyoto(
     total_co2_var = f"{prefix}Emissions|CO2"
 
     tmp = df.copy()
-    tmp.require_variable(total_co2_var, exclude_on_fail=True)
+    tmp.require_data(variable=total_co2_var, exclude_on_fail=True)
     calc_df = tmp.filter(exclude=False)
 
     # aggregate CO2 before moving on

diff --git a/tests/integration/test_categorisation.py b/tests/integration/test_categorisation.py
@@ -152,21 +152,21 @@ def _convert_test_input_to_input_for_add_categorization(test_input):
         copy["variable"] = [
             f"{prefix}|Surface Temperature (GSAT)|{climate_model}{model_version}|{p:.1f}th Percentile"
         ]
-        df = df.append(copy)
+        df = pd.concat([df, copy])
 
     for p in [66]:
         copy = df.loc[df["variable"].str.contains("50.0"), :].copy()
         copy["variable"] = [
             f"{prefix}|Surface Temperature (GSAT)|{climate_model}{model_version}|{p:.1f}th Percentile"
         ]
-        df = df.append(copy)
+        df = pd.concat([df, copy])
 
     for p in [75, 83, 90, 95]:
         copy = df.loc[df["variable"].str.contains("67.0"), :].copy()
         copy["variable"] = [
             f"{prefix}|Surface Temperature (GSAT)|{climate_model}{model_version}|{p:.1f}th Percentile"
         ]
-        df = df.append(copy)
+        df = pd.concat([df, copy])
 
     df["unit"] = "K"
     df["scenario"] = name

diff --git a/tests/integration/test_units.py b/tests/integration/test_units.py
@@ -188,7 +188,9 @@ def test_reclassify_co2_ar6():
     processed_input_emissions_file = os.path.join(
         TEST_DATA_DIR, "ex2_adjusted-waste-other.csv"
     )
-
+    # import pdb
+    # pdb.set_trace()
+    # pyam.compare(reclassify_waste_and_other_co2_ar6(pyam.IamDataFrame(input_emissions_file)), pyam.IamDataFrame(processed_input_emissions_file), )
     assert reclassify_waste_and_other_co2_ar6(
         pyam.IamDataFrame(input_emissions_file)
     ).equals(pyam.IamDataFrame(processed_input_emissions_file))