From 2d28acb392807a40568c906f95cd6b0e580363cc Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 31 Oct 2024 16:46:40 +0100 Subject: [PATCH] Drop unused code in .ikarus; tests --- message_ix_models/model/transport/ikarus.py | 183 +++--------------- .../tests/model/transport/test_ikarus.py | 94 +-------- 2 files changed, 24 insertions(+), 253 deletions(-) diff --git a/message_ix_models/model/transport/ikarus.py b/message_ix_models/model/transport/ikarus.py index 8607bd587a..98c49ca986 100644 --- a/message_ix_models/model/transport/ikarus.py +++ b/message_ix_models/model/transport/ikarus.py @@ -1,27 +1,21 @@ """Prepare non-LDV data from the IKARUS model via :file:`GEAM_TRP_techinput.xlsx`.""" import logging -from collections import defaultdict from functools import lru_cache, partial from operator import le -from typing import TYPE_CHECKING, Dict +from typing import Dict import pandas as pd import xarray as xr from genno import Computer, Key, KeySeq, Quantity, quote from genno.core.key import single_key from iam_units import registry -from message_ix import make_df from openpyxl import load_workbook -from message_ix_models.model.structure import get_codes from message_ix_models.util import ( - ScenarioInfo, - broadcast, cached, convert_units, make_matched_dfs, - nodes_ex_world, package_data_path, same_node, same_time, @@ -29,10 +23,6 @@ ) from .non_ldv import UNITS -from .util import input_commodity_level - -if TYPE_CHECKING: - from .config import Config log = logging.getLogger(__name__) @@ -219,7 +209,27 @@ def read_ikarus_data(occupancy, k_output, k_inv_cost): def prepare_computer(c: Computer): - """Prepare `c` to perform model data preparation using IKARUS data.""" + """Prepare `c` to perform model data preparation using IKARUS data. + + ==================================================================================== + + The data is read from from ``GEAM_TRP_techinput.xlsx``, and the processed data is + exported into ``non_LDV_techs_wrapped.csv``. + + .. note:: superseded by the computations set up by :func:`prepare_computer`. + + Parameters + ---------- + context : .Context + + Returns + ------- + data : dict of (str -> pandas.DataFrame) + Keys are MESSAGE parameter names such as 'input', 'fix_cost'. + Values are data frames ready for :meth:`~.Scenario.add_par`. + Years in the data include the model horizon indicated by + :attr:`.Config.base_model_info`, plus the additional year 2010. + """ # TODO identify whether capacity_factor is needed c.configure(rename_dims={"source": "source"}) @@ -337,152 +347,3 @@ def prepare_computer(c: Computer): # .non_ldv.prepare_computer() only if IKARUS is the selected data source for non-LDV # data. Other derived quantities (emissions factors) are also prepared there based # on these outputs. - - -def get_ikarus_data(context) -> Dict[str, pd.DataFrame]: - """Prepare non-LDV data from :cite:`Martinsen2006`. - - The data is read from from ``GEAM_TRP_techinput.xlsx``, and the processed data is - exported into ``non_LDV_techs_wrapped.csv``. - - .. note:: superseded by the computations set up by :func:`prepare_computer`. - - Parameters - ---------- - context : .Context - - Returns - ------- - data : dict of (str -> pandas.DataFrame) - Keys are MESSAGE parameter names such as 'input', 'fix_cost'. - Values are data frames ready for :meth:`~.Scenario.add_par`. - Years in the data include the model horizon indicated by - :attr:`.Config.base_model_info`, plus the additional year 2010. - """ - # Reference to the transport configuration - config: "Config" = context.transport - tech_info = config.spec.add.set["technology"] - info = config.base_model_info - - # Merge with base model commodity information for io_units() below - # TODO this duplicates code in .ldv; move to a common location - all_info = ScenarioInfo() - all_info.set["commodity"].extend(get_codes("commodity")) - all_info.update(config.spec.add) - - # Retrieve the data from the spreadsheet. Use additional output efficiency and - # investment cost factors for some bus technologies - data = read_ikarus_data( - occupancy=config.non_ldv_output, # type: ignore [attr-defined] - k_output=config.efficiency["bus output"], - k_inv_cost=config.cost["bus inv"], - ) - - # Create data frames to add imported params to MESSAGEix - - # Vintage and active years from scenario info - # Prepend years between 2010 and *firstmodelyear* so that values are saved - missing_years = [x for x in info.set["year"] if (2010 <= x < info.y0)] - vtg_years = missing_years + info.yv_ya["year_vtg"].tolist() - act_years = missing_years + info.yv_ya["year_act"].tolist() - - # Default values to be used as args in make_df() - defaults = dict( - mode="all", - year_act=act_years, - year_vtg=vtg_years, - time="year", - time_origin="year", - time_dest="year", - ) - - # Dict of ('parameter name' -> [list of data frames]) - dfs = defaultdict(list) - - # Iterate over each parameter and technology - for (par, tec), group_data in data.groupby(["param", "technology"]): - # Dict including the default values to be used as args in make_df() - args = defaults.copy() - args["technology"] = tec - - # Parameter-specific arguments/processing - if par == "input": - pass # Handled by input_commodity_level(), below - elif par == "output": - # Get the mode for a technology - mode = tech_info[tech_info.index(tec)].parent.id - args.update(dict(commodity=f"transport pax {mode.lower()}", level="useful")) - - # Units, as an abbreviated string - _units = group_data.apply(lambda x: x.units).unique() - assert len(_units) == 1, "Units must be unique per (tec, par)" - units = _units[0] - args["unit"] = f"{units:~}" - - # Create data frame with values from *args* - df = make_df(par, **args) - - # Assign input commodity and level according to the technology - if par == "input": - df = input_commodity_level(context, df, default_level="final") - - # Copy data into the 'value' column, by vintage year - for (year, *_), value in group_data.items(): - df.loc[df["year_vtg"] == year, "value"] = value.magnitude - - # Drop duplicates. For parameters with 'year_vtg' but no 'year_act' dimension, - # the same year_vtg appears multiple times because of the contents of *defaults* - df.drop_duplicates(inplace=True) - - # Fill remaining values for the rest of vintage years with the last value - # registered, in this case for 2030. - df["value"] = df["value"].fillna(method="ffill") - - # Convert to the model's preferred input/output units for each commodity - if par in ("input", "output"): - target_units = df.apply( - lambda row: all_info.io_units( - row["technology"], row["commodity"], row["level"] - ), - axis=1, - ).unique() - assert 1 == len(target_units) - else: - target_units = [] - - if len(target_units): - # FIXME improve convert_units() to handle more of these steps - df["value"] = convert_units( - df["value"], {"value": (1.0, units, target_units[0])} - ) - df["unit"] = f"{target_units[0]:~}" - - # Round up technical_lifetime values due to incompatibility in handling - # non-integer values in the GAMS code - if par == "technical_lifetime": - df["value"] = df["value"].round() - - # Broadcast across all nodes - dfs[par].append( - df.pipe(broadcast, node_loc=nodes_ex_world(info.N)).pipe(same_node) - ) - - # Concatenate data frames for each model parameter - result = {par: pd.concat(list_of_df) for par, list_of_df in dfs.items()} - - # Capacity factors all 1.0 - result.update(make_matched_dfs(result["output"], capacity_factor=1.0)) - result["capacity_factor"]["unit"] = "" - - if context.get("debug", False): - # Directory for debug output (if any) - debug_dir = context.get_local_path("debug") - # Ensure the directory - debug_dir.mkdir(parents=True, exist_ok=True) - - for name, df in result.items(): - target = debug_dir.joinpath(f"ikarus-{name}.csv") - log.info(f"Dump data to {target}") - df.to_csv(target, index=False) - - return result diff --git a/message_ix_models/tests/model/transport/test_ikarus.py b/message_ix_models/tests/model/transport/test_ikarus.py index 3136af5e8b..0b10ff0362 100644 --- a/message_ix_models/tests/model/transport/test_ikarus.py +++ b/message_ix_models/tests/model/transport/test_ikarus.py @@ -1,105 +1,15 @@ -import pandas as pd import pytest from iam_units import registry from message_ix import make_df from numpy.testing import assert_allclose from pandas.testing import assert_series_equal -from message_ix_models.model.transport import build, ikarus, testing +from message_ix_models.model.transport import build, testing from message_ix_models.model.transport.non_ldv import UNITS from message_ix_models.model.transport.testing import assert_units from message_ix_models.project.navigate import T35_POLICY -@pytest.mark.skip(reason="Deprecated, slow") -@pytest.mark.parametrize("years", ["A", "B"]) -@pytest.mark.parametrize( - "regions, N_node", [("R11", 11), ("R12", 12), ("R14", 14), ("ISR", 1)] -) -def test_get_ikarus_data0(test_context, regions, N_node, years): - ctx = test_context - _, info = testing.configure_build(ctx, regions=regions, years=years) - - # get_ikarus_data() succeeds on the bare RES - data = ikarus.get_ikarus_data(ctx) - - # Returns a mapping - assert { - "capacity_factor", - "fix_cost", - "input", - "inv_cost", - "output", - "technical_lifetime", - } == set(data.keys()) - assert all(map(lambda df: isinstance(df, pd.DataFrame), data.values())) - - # Retrieve DataFrame for par e.g. 'inv_cost' and tech e.g. 'rail_pub' - inv = data["inv_cost"] - inv_rail_pub = inv[inv["technology"] == "rail_pub"] - - # NB: *prep_years* is created to accommodate prepended years before than - # *firstmodelyear*. See ikarus.py to check how/why those are prepended. - prep_years = (1 if years == "A" else 2) + len(info.Y) - # Regions * 13 years (inv_cost has 'year_vtg' but not 'year_act' dim) - rows_per_tech = N_node * prep_years - N_techs = 18 - - # Data have been loaded with the correct shape and magnitude: - assert inv_rail_pub.shape == (rows_per_tech, 5), inv_rail_pub - assert inv.shape == (rows_per_tech * N_techs, 5) - - # Magnitude for year e.g. 2020 - values = inv_rail_pub[inv_rail_pub["year_vtg"] == 2020]["value"] - value = values.iloc[0] - assert round(value, 3) == 3.233 - - # Units of each parameter have the correct dimensionality - dims = { - "capacity_factor": {}, # always dimensionless - "inv_cost": {"[currency]": 1, "[vehicle]": -1}, - "fix_cost": {"[currency]": 1, "[vehicle]": -1, "[time]": -1}, - "output": {"[passenger]": 1, "[vehicle]": -1}, - "technical_lifetime": {"[time]": 1}, - } - for par, dim in dims.items(): - assert_units(data[par], dim) - - # Specific magnitudes of other values to check - checks = [ - # commented (PNK 2022-06-17): corrected abuse of capacity_factor to include - # unrelated concepts - # dict(par="capacity_factor", year_vtg=2010, value=0.000905), - # dict(par="capacity_factor", year_vtg=2050, value=0.000886), - dict(par="technical_lifetime", year_vtg=2010, value=15.0), - dict(par="technical_lifetime", year_vtg=2050, value=15.0), - ] - defaults = dict(node_loc=info.N[-1], technology="ICG_bus", time="year") - - for check in checks: - # Create expected data - par_name = check.pop("par") - check["year_act"] = check["year_vtg"] - exp = make_df(par_name, **defaults, **check) - assert len(exp) == 1, "Single row for expected value" - - # Use merge() to find data with matching column values - columns = sorted(set(exp.columns) - {"value", "unit"}) - result = exp.merge(data[par_name], on=columns, how="inner") - - # Single row matches - assert len(result) == 1, result - - # Values match - assert_series_equal( - result["value_x"], - result["value_y"], - check_exact=False, - check_names=False, - atol=1e-4, - ) - - @build.get_computer.minimum_version @pytest.mark.parametrize("years", ["A", "B"]) @pytest.mark.parametrize( @@ -112,7 +22,7 @@ def test_get_ikarus_data0(test_context, regions, N_node, years): ], ) @pytest.mark.parametrize("options", [{}, dict(navigate_scenario=T35_POLICY.TEC)]) -def test_get_ikarus_data1(test_context, regions, N_node, years, options): +def test_get_ikarus_data(test_context, regions, N_node, years, options): """Test genno-based IKARUS data prep.""" ctx = test_context c, info = testing.configure_build(