Skip to content

Commit

Permalink
Drop unused code in .ikarus; tests
Browse files Browse the repository at this point in the history
  • Loading branch information
khaeru committed Nov 5, 2024
1 parent 0e8dd0e commit 2d28acb
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 253 deletions.
183 changes: 22 additions & 161 deletions message_ix_models/model/transport/ikarus.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,28 @@
"""Prepare non-LDV data from the IKARUS model via :file:`GEAM_TRP_techinput.xlsx`."""

import logging
from collections import defaultdict
from functools import lru_cache, partial
from operator import le
from typing import TYPE_CHECKING, Dict
from typing import Dict

import pandas as pd
import xarray as xr
from genno import Computer, Key, KeySeq, Quantity, quote
from genno.core.key import single_key
from iam_units import registry
from message_ix import make_df
from openpyxl import load_workbook

from message_ix_models.model.structure import get_codes
from message_ix_models.util import (
ScenarioInfo,
broadcast,
cached,
convert_units,
make_matched_dfs,
nodes_ex_world,
package_data_path,
same_node,
same_time,
series_of_pint_quantity,
)

from .non_ldv import UNITS
from .util import input_commodity_level

if TYPE_CHECKING:
from .config import Config

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -219,7 +209,27 @@ def read_ikarus_data(occupancy, k_output, k_inv_cost):


def prepare_computer(c: Computer):
"""Prepare `c` to perform model data preparation using IKARUS data."""
"""Prepare `c` to perform model data preparation using IKARUS data.
====================================================================================
The data is read from from ``GEAM_TRP_techinput.xlsx``, and the processed data is
exported into ``non_LDV_techs_wrapped.csv``.
.. note:: superseded by the computations set up by :func:`prepare_computer`.
Parameters
----------
context : .Context
Returns
-------
data : dict of (str -> pandas.DataFrame)
Keys are MESSAGE parameter names such as 'input', 'fix_cost'.
Values are data frames ready for :meth:`~.Scenario.add_par`.
Years in the data include the model horizon indicated by
:attr:`.Config.base_model_info`, plus the additional year 2010.
"""
# TODO identify whether capacity_factor is needed
c.configure(rename_dims={"source": "source"})

Expand Down Expand Up @@ -337,152 +347,3 @@ def prepare_computer(c: Computer):
# .non_ldv.prepare_computer() only if IKARUS is the selected data source for non-LDV
# data. Other derived quantities (emissions factors) are also prepared there based
# on these outputs.


def get_ikarus_data(context) -> Dict[str, pd.DataFrame]:
"""Prepare non-LDV data from :cite:`Martinsen2006`.
The data is read from from ``GEAM_TRP_techinput.xlsx``, and the processed data is
exported into ``non_LDV_techs_wrapped.csv``.
.. note:: superseded by the computations set up by :func:`prepare_computer`.
Parameters
----------
context : .Context
Returns
-------
data : dict of (str -> pandas.DataFrame)
Keys are MESSAGE parameter names such as 'input', 'fix_cost'.
Values are data frames ready for :meth:`~.Scenario.add_par`.
Years in the data include the model horizon indicated by
:attr:`.Config.base_model_info`, plus the additional year 2010.
"""
# Reference to the transport configuration
config: "Config" = context.transport
tech_info = config.spec.add.set["technology"]
info = config.base_model_info

# Merge with base model commodity information for io_units() below
# TODO this duplicates code in .ldv; move to a common location
all_info = ScenarioInfo()
all_info.set["commodity"].extend(get_codes("commodity"))
all_info.update(config.spec.add)

# Retrieve the data from the spreadsheet. Use additional output efficiency and
# investment cost factors for some bus technologies
data = read_ikarus_data(
occupancy=config.non_ldv_output, # type: ignore [attr-defined]
k_output=config.efficiency["bus output"],
k_inv_cost=config.cost["bus inv"],
)

# Create data frames to add imported params to MESSAGEix

# Vintage and active years from scenario info
# Prepend years between 2010 and *firstmodelyear* so that values are saved
missing_years = [x for x in info.set["year"] if (2010 <= x < info.y0)]
vtg_years = missing_years + info.yv_ya["year_vtg"].tolist()
act_years = missing_years + info.yv_ya["year_act"].tolist()

# Default values to be used as args in make_df()
defaults = dict(
mode="all",
year_act=act_years,
year_vtg=vtg_years,
time="year",
time_origin="year",
time_dest="year",
)

# Dict of ('parameter name' -> [list of data frames])
dfs = defaultdict(list)

# Iterate over each parameter and technology
for (par, tec), group_data in data.groupby(["param", "technology"]):
# Dict including the default values to be used as args in make_df()
args = defaults.copy()
args["technology"] = tec

# Parameter-specific arguments/processing
if par == "input":
pass # Handled by input_commodity_level(), below
elif par == "output":
# Get the mode for a technology
mode = tech_info[tech_info.index(tec)].parent.id
args.update(dict(commodity=f"transport pax {mode.lower()}", level="useful"))

# Units, as an abbreviated string
_units = group_data.apply(lambda x: x.units).unique()
assert len(_units) == 1, "Units must be unique per (tec, par)"
units = _units[0]
args["unit"] = f"{units:~}"

# Create data frame with values from *args*
df = make_df(par, **args)

# Assign input commodity and level according to the technology
if par == "input":
df = input_commodity_level(context, df, default_level="final")

# Copy data into the 'value' column, by vintage year
for (year, *_), value in group_data.items():
df.loc[df["year_vtg"] == year, "value"] = value.magnitude

# Drop duplicates. For parameters with 'year_vtg' but no 'year_act' dimension,
# the same year_vtg appears multiple times because of the contents of *defaults*
df.drop_duplicates(inplace=True)

# Fill remaining values for the rest of vintage years with the last value
# registered, in this case for 2030.
df["value"] = df["value"].fillna(method="ffill")

# Convert to the model's preferred input/output units for each commodity
if par in ("input", "output"):
target_units = df.apply(
lambda row: all_info.io_units(
row["technology"], row["commodity"], row["level"]
),
axis=1,
).unique()
assert 1 == len(target_units)
else:
target_units = []

if len(target_units):
# FIXME improve convert_units() to handle more of these steps
df["value"] = convert_units(
df["value"], {"value": (1.0, units, target_units[0])}
)
df["unit"] = f"{target_units[0]:~}"

# Round up technical_lifetime values due to incompatibility in handling
# non-integer values in the GAMS code
if par == "technical_lifetime":
df["value"] = df["value"].round()

# Broadcast across all nodes
dfs[par].append(
df.pipe(broadcast, node_loc=nodes_ex_world(info.N)).pipe(same_node)
)

# Concatenate data frames for each model parameter
result = {par: pd.concat(list_of_df) for par, list_of_df in dfs.items()}

# Capacity factors all 1.0
result.update(make_matched_dfs(result["output"], capacity_factor=1.0))
result["capacity_factor"]["unit"] = ""

if context.get("debug", False):
# Directory for debug output (if any)
debug_dir = context.get_local_path("debug")
# Ensure the directory
debug_dir.mkdir(parents=True, exist_ok=True)

for name, df in result.items():
target = debug_dir.joinpath(f"ikarus-{name}.csv")
log.info(f"Dump data to {target}")
df.to_csv(target, index=False)

return result
94 changes: 2 additions & 92 deletions message_ix_models/tests/model/transport/test_ikarus.py
Original file line number Diff line number Diff line change
@@ -1,105 +1,15 @@
import pandas as pd
import pytest
from iam_units import registry
from message_ix import make_df
from numpy.testing import assert_allclose
from pandas.testing import assert_series_equal

from message_ix_models.model.transport import build, ikarus, testing
from message_ix_models.model.transport import build, testing
from message_ix_models.model.transport.non_ldv import UNITS
from message_ix_models.model.transport.testing import assert_units
from message_ix_models.project.navigate import T35_POLICY


@pytest.mark.skip(reason="Deprecated, slow")
@pytest.mark.parametrize("years", ["A", "B"])
@pytest.mark.parametrize(
"regions, N_node", [("R11", 11), ("R12", 12), ("R14", 14), ("ISR", 1)]
)
def test_get_ikarus_data0(test_context, regions, N_node, years):
ctx = test_context
_, info = testing.configure_build(ctx, regions=regions, years=years)

# get_ikarus_data() succeeds on the bare RES
data = ikarus.get_ikarus_data(ctx)

# Returns a mapping
assert {
"capacity_factor",
"fix_cost",
"input",
"inv_cost",
"output",
"technical_lifetime",
} == set(data.keys())
assert all(map(lambda df: isinstance(df, pd.DataFrame), data.values()))

# Retrieve DataFrame for par e.g. 'inv_cost' and tech e.g. 'rail_pub'
inv = data["inv_cost"]
inv_rail_pub = inv[inv["technology"] == "rail_pub"]

# NB: *prep_years* is created to accommodate prepended years before than
# *firstmodelyear*. See ikarus.py to check how/why those are prepended.
prep_years = (1 if years == "A" else 2) + len(info.Y)
# Regions * 13 years (inv_cost has 'year_vtg' but not 'year_act' dim)
rows_per_tech = N_node * prep_years
N_techs = 18

# Data have been loaded with the correct shape and magnitude:
assert inv_rail_pub.shape == (rows_per_tech, 5), inv_rail_pub
assert inv.shape == (rows_per_tech * N_techs, 5)

# Magnitude for year e.g. 2020
values = inv_rail_pub[inv_rail_pub["year_vtg"] == 2020]["value"]
value = values.iloc[0]
assert round(value, 3) == 3.233

# Units of each parameter have the correct dimensionality
dims = {
"capacity_factor": {}, # always dimensionless
"inv_cost": {"[currency]": 1, "[vehicle]": -1},
"fix_cost": {"[currency]": 1, "[vehicle]": -1, "[time]": -1},
"output": {"[passenger]": 1, "[vehicle]": -1},
"technical_lifetime": {"[time]": 1},
}
for par, dim in dims.items():
assert_units(data[par], dim)

# Specific magnitudes of other values to check
checks = [
# commented (PNK 2022-06-17): corrected abuse of capacity_factor to include
# unrelated concepts
# dict(par="capacity_factor", year_vtg=2010, value=0.000905),
# dict(par="capacity_factor", year_vtg=2050, value=0.000886),
dict(par="technical_lifetime", year_vtg=2010, value=15.0),
dict(par="technical_lifetime", year_vtg=2050, value=15.0),
]
defaults = dict(node_loc=info.N[-1], technology="ICG_bus", time="year")

for check in checks:
# Create expected data
par_name = check.pop("par")
check["year_act"] = check["year_vtg"]
exp = make_df(par_name, **defaults, **check)
assert len(exp) == 1, "Single row for expected value"

# Use merge() to find data with matching column values
columns = sorted(set(exp.columns) - {"value", "unit"})
result = exp.merge(data[par_name], on=columns, how="inner")

# Single row matches
assert len(result) == 1, result

# Values match
assert_series_equal(
result["value_x"],
result["value_y"],
check_exact=False,
check_names=False,
atol=1e-4,
)


@build.get_computer.minimum_version
@pytest.mark.parametrize("years", ["A", "B"])
@pytest.mark.parametrize(
Expand All @@ -112,7 +22,7 @@ def test_get_ikarus_data0(test_context, regions, N_node, years):
],
)
@pytest.mark.parametrize("options", [{}, dict(navigate_scenario=T35_POLICY.TEC)])
def test_get_ikarus_data1(test_context, regions, N_node, years, options):
def test_get_ikarus_data(test_context, regions, N_node, years, options):
"""Test genno-based IKARUS data prep."""
ctx = test_context
c, info = testing.configure_build(
Expand Down

0 comments on commit 2d28acb

Please sign in to comment.