Drop unused code in .ikarus; tests

iiasa · Nov 5, 2024 · 2d28acb · 2d28acb
1 parent 0e8dd0e
commit 2d28acb
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 253 deletions.
diff --git a/message_ix_models/model/transport/ikarus.py b/message_ix_models/model/transport/ikarus.py
@@ -1,38 +1,28 @@
 """Prepare non-LDV data from the IKARUS model via :file:`GEAM_TRP_techinput.xlsx`."""
 
 import logging
-from collections import defaultdict
 from functools import lru_cache, partial
 from operator import le
-from typing import TYPE_CHECKING, Dict
+from typing import Dict
 
 import pandas as pd
 import xarray as xr
 from genno import Computer, Key, KeySeq, Quantity, quote
 from genno.core.key import single_key
 from iam_units import registry
-from message_ix import make_df
 from openpyxl import load_workbook
 
-from message_ix_models.model.structure import get_codes
 from message_ix_models.util import (
-    ScenarioInfo,
-    broadcast,
     cached,
     convert_units,
     make_matched_dfs,
-    nodes_ex_world,
     package_data_path,
     same_node,
     same_time,
     series_of_pint_quantity,
 )
 
 from .non_ldv import UNITS
-from .util import input_commodity_level
-
-if TYPE_CHECKING:
-    from .config import Config
 
 log = logging.getLogger(__name__)
 
@@ -219,7 +209,27 @@ def read_ikarus_data(occupancy, k_output, k_inv_cost):
 
 
 def prepare_computer(c: Computer):
-    """Prepare `c` to perform model data preparation using IKARUS data."""
+    """Prepare `c` to perform model data preparation using IKARUS data.
+
+    ====================================================================================
+
+    The data is read from from ``GEAM_TRP_techinput.xlsx``, and the processed data is
+    exported into ``non_LDV_techs_wrapped.csv``.
+
+    .. note:: superseded by the computations set up by :func:`prepare_computer`.
+
+    Parameters
+    ----------
+    context : .Context
+
+    Returns
+    -------
+    data : dict of (str -> pandas.DataFrame)
+        Keys are MESSAGE parameter names such as 'input', 'fix_cost'.
+        Values are data frames ready for :meth:`~.Scenario.add_par`.
+        Years in the data include the model horizon indicated by
+        :attr:`.Config.base_model_info`, plus the additional year 2010.
+    """
     # TODO identify whether capacity_factor is needed
     c.configure(rename_dims={"source": "source"})
 
@@ -337,152 +347,3 @@ def prepare_computer(c: Computer):
     # .non_ldv.prepare_computer() only if IKARUS is the selected data source for non-LDV
     # data. Other derived quantities (emissions factors) are also prepared there based
     # on these outputs.
-
-
-def get_ikarus_data(context) -> Dict[str, pd.DataFrame]:
-    """Prepare non-LDV data from :cite:`Martinsen2006`.
-
-    The data is read from from ``GEAM_TRP_techinput.xlsx``, and the processed data is
-    exported into ``non_LDV_techs_wrapped.csv``.
-
-    .. note:: superseded by the computations set up by :func:`prepare_computer`.
-
-    Parameters
-    ----------
-    context : .Context
-
-    Returns
-    -------
-    data : dict of (str -> pandas.DataFrame)
-        Keys are MESSAGE parameter names such as 'input', 'fix_cost'.
-        Values are data frames ready for :meth:`~.Scenario.add_par`.
-        Years in the data include the model horizon indicated by
-        :attr:`.Config.base_model_info`, plus the additional year 2010.
-    """
-    # Reference to the transport configuration
-    config: "Config" = context.transport
-    tech_info = config.spec.add.set["technology"]
-    info = config.base_model_info
-
-    # Merge with base model commodity information for io_units() below
-    # TODO this duplicates code in .ldv; move to a common location
-    all_info = ScenarioInfo()
-    all_info.set["commodity"].extend(get_codes("commodity"))
-    all_info.update(config.spec.add)
-
-    # Retrieve the data from the spreadsheet. Use additional output efficiency and
-    # investment cost factors for some bus technologies
-    data = read_ikarus_data(
-        occupancy=config.non_ldv_output,  # type: ignore [attr-defined]
-        k_output=config.efficiency["bus output"],
-        k_inv_cost=config.cost["bus inv"],
-    )
-
-    # Create data frames to add imported params to MESSAGEix
-
-    # Vintage and active years from scenario info
-    # Prepend years between 2010 and *firstmodelyear* so that values are saved
-    missing_years = [x for x in info.set["year"] if (2010 <= x < info.y0)]
-    vtg_years = missing_years + info.yv_ya["year_vtg"].tolist()
-    act_years = missing_years + info.yv_ya["year_act"].tolist()
-
-    # Default values to be used as args in make_df()
-    defaults = dict(
-        mode="all",
-        year_act=act_years,
-        year_vtg=vtg_years,
-        time="year",
-        time_origin="year",
-        time_dest="year",
-    )
-
-    # Dict of ('parameter name' -> [list of data frames])
-    dfs = defaultdict(list)
-
-    # Iterate over each parameter and technology
-    for (par, tec), group_data in data.groupby(["param", "technology"]):
-        # Dict including the default values to be used as args in make_df()
-        args = defaults.copy()
-        args["technology"] = tec
-
-        # Parameter-specific arguments/processing
-        if par == "input":
-            pass  # Handled by input_commodity_level(), below
-        elif par == "output":
-            # Get the mode for a technology
-            mode = tech_info[tech_info.index(tec)].parent.id
-            args.update(dict(commodity=f"transport pax {mode.lower()}", level="useful"))
-
-        # Units, as an abbreviated string
-        _units = group_data.apply(lambda x: x.units).unique()
-        assert len(_units) == 1, "Units must be unique per (tec, par)"
-        units = _units[0]
-        args["unit"] = f"{units:~}"
-
-        # Create data frame with values from *args*
-        df = make_df(par, **args)
-
-        # Assign input commodity and level according to the technology
-        if par == "input":
-            df = input_commodity_level(context, df, default_level="final")
-
-        # Copy data into the 'value' column, by vintage year
-        for (year, *_), value in group_data.items():
-            df.loc[df["year_vtg"] == year, "value"] = value.magnitude
-
-        # Drop duplicates. For parameters with 'year_vtg' but no 'year_act' dimension,
-        # the same year_vtg appears multiple times because of the contents of *defaults*
-        df.drop_duplicates(inplace=True)
-
-        # Fill remaining values for the rest of vintage years with the last value
-        # registered, in this case for 2030.
-        df["value"] = df["value"].fillna(method="ffill")
-
-        # Convert to the model's preferred input/output units for each commodity
-        if par in ("input", "output"):
-            target_units = df.apply(
-                lambda row: all_info.io_units(
-                    row["technology"], row["commodity"], row["level"]
-                ),
-                axis=1,
-            ).unique()
-            assert 1 == len(target_units)
-        else:
-            target_units = []
-
-        if len(target_units):
-            # FIXME improve convert_units() to handle more of these steps
-            df["value"] = convert_units(
-                df["value"], {"value": (1.0, units, target_units[0])}
-            )
-            df["unit"] = f"{target_units[0]:~}"
-
-        # Round up technical_lifetime values due to incompatibility in handling
-        # non-integer values in the GAMS code
-        if par == "technical_lifetime":
-            df["value"] = df["value"].round()
-
-        # Broadcast across all nodes
-        dfs[par].append(
-            df.pipe(broadcast, node_loc=nodes_ex_world(info.N)).pipe(same_node)
-        )
-
-    # Concatenate data frames for each model parameter
-    result = {par: pd.concat(list_of_df) for par, list_of_df in dfs.items()}
-
-    # Capacity factors all 1.0
-    result.update(make_matched_dfs(result["output"], capacity_factor=1.0))
-    result["capacity_factor"]["unit"] = ""
-
-    if context.get("debug", False):
-        # Directory for debug output (if any)
-        debug_dir = context.get_local_path("debug")
-        # Ensure the directory
-        debug_dir.mkdir(parents=True, exist_ok=True)
-
-        for name, df in result.items():
-            target = debug_dir.joinpath(f"ikarus-{name}.csv")
-            log.info(f"Dump data to {target}")
-            df.to_csv(target, index=False)
-
-    return result
diff --git a/message_ix_models/tests/model/transport/test_ikarus.py b/message_ix_models/tests/model/transport/test_ikarus.py
@@ -1,105 +1,15 @@
-import pandas as pd
 import pytest
 from iam_units import registry
 from message_ix import make_df
 from numpy.testing import assert_allclose
 from pandas.testing import assert_series_equal
 
-from message_ix_models.model.transport import build, ikarus, testing
+from message_ix_models.model.transport import build, testing
 from message_ix_models.model.transport.non_ldv import UNITS
 from message_ix_models.model.transport.testing import assert_units
 from message_ix_models.project.navigate import T35_POLICY
 
 
-@pytest.mark.skip(reason="Deprecated, slow")
-@pytest.mark.parametrize("years", ["A", "B"])
-@pytest.mark.parametrize(
-    "regions, N_node", [("R11", 11), ("R12", 12), ("R14", 14), ("ISR", 1)]
-)
-def test_get_ikarus_data0(test_context, regions, N_node, years):
-    ctx = test_context
-    _, info = testing.configure_build(ctx, regions=regions, years=years)
-
-    # get_ikarus_data() succeeds on the bare RES
-    data = ikarus.get_ikarus_data(ctx)
-
-    # Returns a mapping
-    assert {
-        "capacity_factor",
-        "fix_cost",
-        "input",
-        "inv_cost",
-        "output",
-        "technical_lifetime",
-    } == set(data.keys())
-    assert all(map(lambda df: isinstance(df, pd.DataFrame), data.values()))
-
-    # Retrieve DataFrame for par e.g. 'inv_cost' and tech e.g. 'rail_pub'
-    inv = data["inv_cost"]
-    inv_rail_pub = inv[inv["technology"] == "rail_pub"]
-
-    # NB: *prep_years* is created to accommodate prepended years before than
-    # *firstmodelyear*. See ikarus.py to check how/why those are prepended.
-    prep_years = (1 if years == "A" else 2) + len(info.Y)
-    # Regions * 13 years (inv_cost has 'year_vtg' but not 'year_act' dim)
-    rows_per_tech = N_node * prep_years
-    N_techs = 18
-
-    # Data have been loaded with the correct shape and magnitude:
-    assert inv_rail_pub.shape == (rows_per_tech, 5), inv_rail_pub
-    assert inv.shape == (rows_per_tech * N_techs, 5)
-
-    # Magnitude for year e.g. 2020
-    values = inv_rail_pub[inv_rail_pub["year_vtg"] == 2020]["value"]
-    value = values.iloc[0]
-    assert round(value, 3) == 3.233
-
-    # Units of each parameter have the correct dimensionality
-    dims = {
-        "capacity_factor": {},  # always dimensionless
-        "inv_cost": {"[currency]": 1, "[vehicle]": -1},
-        "fix_cost": {"[currency]": 1, "[vehicle]": -1, "[time]": -1},
-        "output": {"[passenger]": 1, "[vehicle]": -1},
-        "technical_lifetime": {"[time]": 1},
-    }
-    for par, dim in dims.items():
-        assert_units(data[par], dim)
-
-    # Specific magnitudes of other values to check
-    checks = [
-        # commented (PNK 2022-06-17): corrected abuse of capacity_factor to include
-        # unrelated concepts
-        # dict(par="capacity_factor", year_vtg=2010, value=0.000905),
-        # dict(par="capacity_factor", year_vtg=2050, value=0.000886),
-        dict(par="technical_lifetime", year_vtg=2010, value=15.0),
-        dict(par="technical_lifetime", year_vtg=2050, value=15.0),
-    ]
-    defaults = dict(node_loc=info.N[-1], technology="ICG_bus", time="year")
-
-    for check in checks:
-        # Create expected data
-        par_name = check.pop("par")
-        check["year_act"] = check["year_vtg"]
-        exp = make_df(par_name, **defaults, **check)
-        assert len(exp) == 1, "Single row for expected value"
-
-        # Use merge() to find data with matching column values
-        columns = sorted(set(exp.columns) - {"value", "unit"})
-        result = exp.merge(data[par_name], on=columns, how="inner")
-
-        # Single row matches
-        assert len(result) == 1, result
-
-        # Values match
-        assert_series_equal(
-            result["value_x"],
-            result["value_y"],
-            check_exact=False,
-            check_names=False,
-            atol=1e-4,
-        )
-
-
 @build.get_computer.minimum_version
 @pytest.mark.parametrize("years", ["A", "B"])
 @pytest.mark.parametrize(
@@ -112,7 +22,7 @@ def test_get_ikarus_data0(test_context, regions, N_node, years):
     ],
 )
 @pytest.mark.parametrize("options", [{}, dict(navigate_scenario=T35_POLICY.TEC)])
-def test_get_ikarus_data1(test_context, regions, N_node, years, options):
+def test_get_ikarus_data(test_context, regions, N_node, years, options):
     """Test genno-based IKARUS data prep."""
     ctx = test_context
     c, info = testing.configure_build(