From c829c75458ef88e47f6557d512d2966c3e8d071d Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 10:39:06 +0100 Subject: [PATCH 01/24] Add direction argument to History class --- src/optimagic/optimization/history.py | 6 ++++-- .../optimization/internal_optimization_problem.py | 4 ++-- tests/optimagic/optimization/test_history.py | 8 +++++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index aa9cd0bca..073bcd78c 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -4,7 +4,7 @@ import numpy as np -from optimagic.typing import EvalTask, PyTree +from optimagic.typing import Direction, EvalTask, PyTree @dataclass(frozen=True) @@ -17,7 +17,9 @@ class HistoryEntry: class History: # TODO: add counters for the relevant evaluations - def __init__(self) -> None: + def __init__(self, direction: Direction) -> None: + self.direction = direction + # Initialize lists to store the history self._params: list[PyTree] = [] self._fun: list[float | None] = [] self._time: list[float] = [] diff --git a/src/optimagic/optimization/internal_optimization_problem.py b/src/optimagic/optimization/internal_optimization_problem.py index f0951df74..2a28943c5 100644 --- a/src/optimagic/optimization/internal_optimization_problem.py +++ b/src/optimagic/optimization/internal_optimization_problem.py @@ -68,7 +68,7 @@ def __init__( self._error_handling = error_handling self._error_penalty_func = error_penalty_func self._batch_evaluator = batch_evaluator - self._history = History() + self._history = History(direction) self._linear_constraints = linear_constraints self._nonlinear_constraints = nonlinear_constraints self._logger = logger @@ -177,7 +177,7 @@ def exploration_fun( def with_new_history(self) -> Self: new = copy(self) - new._history = History() + new._history = History(self.direction) return new def with_error_handling(self, error_handling: ErrorHandling) -> Self: diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index bd6dae3dd..62ccc0284 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -2,7 +2,7 @@ from numpy.testing import assert_array_almost_equal as aaae from optimagic.optimization.history import History, HistoryEntry -from optimagic.typing import EvalTask +from optimagic.typing import Direction, EvalTask @pytest.fixture @@ -15,7 +15,7 @@ def history_entries(): def test_history_add_entry(history_entries): - history = History() + history = History(Direction.MINIMIZE) for entry in history_entries: history.add_entry(entry) @@ -23,15 +23,17 @@ def test_history_add_entry(history_entries): assert history.fun == [1, 2, 3] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 1, 2] + assert history.direction == Direction.MINIMIZE aaae(history.time, [0.0, 0.1, 0.2]) def test_history_add_batch(history_entries): - history = History() + history = History(Direction.MAXIMIZE) history.add_batch(history_entries) assert history.params == [[1, 2, 3], [4, 5, 6], [7, 8, 9]] assert history.fun == [1, 2, 3] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 0, 0] + assert history.direction == Direction.MAXIMIZE aaae(history.time, [0.0, 0.1, 0.2]) From 125d313f7a8a93c2ce37ee13edeaee3f8f55a2ac Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 11:25:13 +0100 Subject: [PATCH 02/24] Create 3.13 compatible envs using pre-commit hooks --- src/optimagic/optimization/history.py | 29 ++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 073bcd78c..5d9fe177e 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -17,14 +17,29 @@ class HistoryEntry: class History: # TODO: add counters for the relevant evaluations - def __init__(self, direction: Direction) -> None: + def __init__( + self, + direction: Direction, + params: list[PyTree] | None = None, + fun: list[float | None] | None = None, + time: list[float] | None = None, + batches: list[int] | None = None, + task: list[EvalTask] | None = None, + ) -> None: + """Initialize a history. + + The history must know the direction of the optimization problem in order to + correctly return monotone sequences. The history can be initialized empty, for + example for usage during an optimization process, or with data, for example to + recover a history from a log. + + """ self.direction = direction - # Initialize lists to store the history - self._params: list[PyTree] = [] - self._fun: list[float | None] = [] - self._time: list[float] = [] - self._batches: list[int] = [] - self._task: list[EvalTask] = [] + self._params = params if params is not None else [] + self._fun = fun if fun is not None else [] + self._time = time if time is not None else [] + self._batches = batches if batches is not None else [] + self._task = task if task is not None else [] def add_entry(self, entry: HistoryEntry, batch_id: int | None = None) -> None: if batch_id is None: From d18b63c0770cb9e0e27e69a4b791e5cf43dd6f89 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 11:52:50 +0100 Subject: [PATCH 03/24] Add attributes of HistoryArrays to History --- src/optimagic/optimization/history.py | 103 +++++++++++++++++++++++--- 1 file changed, 94 insertions(+), 9 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 5d9fe177e..1d8742597 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -1,8 +1,11 @@ import warnings from dataclasses import dataclass +from functools import partial from typing import Any import numpy as np +from numpy.typing import NDArray +from pybaum import get_registry, tree_just_flatten from optimagic.typing import Direction, EvalTask, PyTree @@ -34,6 +37,8 @@ def __init__( recover a history from a log. """ + if direction not in [Direction.MINIMIZE, Direction.MAXIMIZE]: + raise ValueError(f"Invalid direction: {direction}.") self.direction = direction self._params = params if params is not None else [] self._fun = fun if fun is not None else [] @@ -41,6 +46,10 @@ def __init__( self._batches = batches if batches is not None else [] self._task = task if task is not None else [] + # ================================================================================== + # Methods to add entries to the history + # ================================================================================== + def add_entry(self, entry: HistoryEntry, batch_id: int | None = None) -> None: if batch_id is None: batch_id = self._get_next_batch_id() @@ -67,34 +76,94 @@ def add_batch( for entry, id in zip(batch, ids, strict=False): self.add_entry(entry, id) + def _get_next_batch_id(self) -> int: + if not self._batches: + batch = 0 + else: + batch = self._batches[-1] + 1 + return batch + + # ================================================================================== + # Properties to access the history + # ================================================================================== + + # Function value and monotone function value + # ---------------------------------------------------------------------------------- + + @property + def fun(self) -> list[float | None]: + return self._fun + + @property + def fun_array(self) -> NDArray[np.float64]: + return np.array(self._valid_fun, dtype=np.float64) + + @property + def _valid_fun(self) -> list[float]: + return [f for f in self.fun if f is not None] + + @property + def monotone_fun(self) -> NDArray[np.float64]: + if self.direction == Direction.MINIMIZE: + return np.minimum.accumulate(self.fun_array) + elif self.direction == Direction.MAXIMIZE: + return np.maximum.accumulate(self.fun_array) + + # Acceptance + # ---------------------------------------------------------------------------------- + + @property + def is_accepted(self) -> NDArray[np.bool_]: + if self.direction == Direction.MINIMIZE: + return self.fun_array <= self.monotone_fun + elif self.direction == Direction.MAXIMIZE: + return self.fun_array >= self.monotone_fun + + # Parameters + # ---------------------------------------------------------------------------------- + @property def params(self) -> list[PyTree]: return self._params @property - def fun(self) -> list[float | None]: - return self._fun + def flat_params_array(self) -> NDArray[np.float64]: + return np.array(_flatten_params(self._valid_params), dtype=np.float64) + + @property + def _valid_params(self) -> list[PyTree]: + return [p for p, f in zip(self.params, self.fun, strict=True) if f is not None] + + # Time + # ---------------------------------------------------------------------------------- @property def time(self) -> list[float]: arr = np.array(self._time) return (arr - arr[0]).tolist() + @property + def time_array(self) -> NDArray[np.float64]: + return np.array(self._valid_time, dtype=np.float64) + + @property + def _valid_time(self) -> list[float]: + return [t for t, f in zip(self.time, self.fun, strict=True) if f is not None] + + # Batches + # ---------------------------------------------------------------------------------- + @property def batches(self) -> list[int]: return self._batches + # Tasks + # ---------------------------------------------------------------------------------- + @property def task(self) -> list[EvalTask]: return self._task - def _get_next_batch_id(self) -> int: - if not self._batches: - batch = 0 - else: - batch = self._batches[-1] + 1 - return batch - # ================================================================================== # Add deprecated dict access # ================================================================================== @@ -115,3 +184,19 @@ def __getitem__(self, key: str) -> Any: msg = "dict-like access to History is deprecated. Use attribute access instead." warnings.warn(msg, FutureWarning) return getattr(self, key) + + +def _flatten_params(valid_params: list[PyTree]) -> list[list[float]]: + is_flat = ( + len(valid_params) > 0 + and isinstance(valid_params[0], np.ndarray) + and valid_params[0].ndim == 1 + ) + + if is_flat: + flatten = lambda x: x.tolist() + else: + registry = get_registry(extended=True) + flatten = partial(tree_just_flatten, registry=registry) + + return [flatten(p) for p in valid_params] From 7ad2edd2ae23aa9628e6b2172a67a8aafed1e27e Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 13:34:52 +0100 Subject: [PATCH 04/24] Use History in convergence report creation --- .../optimization/convergence_report.py | 18 ++++---- src/optimagic/optimization/history.py | 3 +- src/optimagic/optimization/process_results.py | 17 +++---- .../optimization/test_convergence_report.py | 44 +++++++++---------- 4 files changed, 39 insertions(+), 43 deletions(-) diff --git a/src/optimagic/optimization/convergence_report.py b/src/optimagic/optimization/convergence_report.py index bb44a828b..5b039fb58 100644 --- a/src/optimagic/optimization/convergence_report.py +++ b/src/optimagic/optimization/convergence_report.py @@ -1,16 +1,14 @@ import numpy as np +from numpy.typing import NDArray -from optimagic.optimization.history_tools import get_history_arrays +from optimagic.optimization.history import History -def get_convergence_report(history, direction): - history_arrs = get_history_arrays( - history=history, - direction=direction, - ) +def get_convergence_report(history: History) -> dict[str, dict[str, float]] | None: + is_accepted = history.is_accepted - critvals = history_arrs.fun[history_arrs.is_accepted] - params = history_arrs.params[history_arrs.is_accepted] + critvals = history.fun_array[is_accepted] + params = history.flat_params_array[is_accepted] if len(critvals) < 2: out = None @@ -35,7 +33,7 @@ def get_convergence_report(history, direction): return out -def _get_max_f_changes(critvals): +def _get_max_f_changes(critvals: NDArray[np.float64]) -> tuple[float, float]: best_val = critvals[-1] worst_val = critvals[0] @@ -47,7 +45,7 @@ def _get_max_f_changes(critvals): return max_change_rel, max_change_abs -def _get_max_x_changes(params): +def _get_max_x_changes(params: NDArray[np.float64]) -> tuple[float, float]: best_x = params[-1] diffs = params - best_x denom = np.clip(np.abs(best_x), 0.1, np.inf) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 1d8742597..3c8e94867 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -5,8 +5,9 @@ import numpy as np from numpy.typing import NDArray -from pybaum import get_registry, tree_just_flatten +from optimagic.parameters.tree_conversion import tree_just_flatten +from optimagic.parameters.tree_registry import get_registry from optimagic.typing import Direction, EvalTask, PyTree diff --git a/src/optimagic/optimization/process_results.py b/src/optimagic/optimization/process_results.py index 0817649f5..c77acf665 100644 --- a/src/optimagic/optimization/process_results.py +++ b/src/optimagic/optimization/process_results.py @@ -5,6 +5,7 @@ from optimagic.optimization.algorithm import InternalOptimizeResult from optimagic.optimization.convergence_report import get_convergence_report +from optimagic.optimization.history import History from optimagic.optimization.optimize_result import MultistartInfo, OptimizeResult from optimagic.parameters.conversion import Converter from optimagic.typing import AggregationLevel, Direction, PyTree @@ -41,9 +42,7 @@ def process_single_result( fun = -fun if raw_res.history is not None: - conv_report = get_convergence_report( - history=raw_res.history, direction=extra_fields.direction - ) + conv_report = get_convergence_report(raw_res.history) else: conv_report = None @@ -109,15 +108,13 @@ def process_multistart_result( # create a convergence report for the multistart optimization; This is not # the same as the convergence report for the individual local optimizations. # ============================================================================== - crit_hist = [opt.fun for opt in info.local_optima] - params_hist = [opt.params for opt in info.local_optima] - time_hist = [np.nan for opt in info.local_optima] - hist = {"criterion": crit_hist, "params": params_hist, "runtime": time_hist} - - conv_report = get_convergence_report( - history=hist, + report_history = History( direction=extra_fields.direction, + fun=[opt.fun for opt in info.local_optima], + params=[opt.params for opt in info.local_optima], + time=[np.nan for _ in info.local_optima], ) + conv_report = get_convergence_report(report_history) res.convergence_report = conv_report diff --git a/tests/optimagic/optimization/test_convergence_report.py b/tests/optimagic/optimization/test_convergence_report.py index 058698a58..823ef629a 100644 --- a/tests/optimagic/optimization/test_convergence_report.py +++ b/tests/optimagic/optimization/test_convergence_report.py @@ -3,46 +3,46 @@ from numpy.testing import assert_array_almost_equal as aaae from optimagic.optimization.convergence_report import get_convergence_report +from optimagic.optimization.history import History from optimagic.typing import Direction def test_get_convergence_report_minimize(): - hist = { - "criterion": [5, 4.1, 4.4, 4.0], - "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], - "runtime": [0, 1, 2, 3], - } - - calculated = pd.DataFrame.from_dict( - get_convergence_report(hist, Direction.MINIMIZE) + hist = History( + direction=Direction.MINIMIZE, + params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], + fun=[5, 4.1, 4.4, 4.0], + time=[0, 1, 2, 3], ) + calculated = pd.DataFrame.from_dict(get_convergence_report(hist)) + expected = np.array([[0.025, 0.25], [0.05, 1.0], [0.1, 1], [0.1, 2.0]]) aaae(calculated.to_numpy(), expected) def test_get_convergence_report_maximize(): - hist = { - "criterion": [-5, -4.1, -4.4, -4.0], - "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], - "runtime": [0, 1, 2, 3], - } - - calculated = pd.DataFrame.from_dict( - get_convergence_report(hist, Direction.MAXIMIZE) + hist = History( + direction=Direction.MAXIMIZE, + params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], + fun=[-5, -4.1, -4.4, -4.0], + time=[0, 1, 2, 3], ) + calculated = pd.DataFrame.from_dict(get_convergence_report(hist)) + expected = np.array([[0.025, 0.25], [0.05, 1.0], [0.1, 1], [0.1, 2.0]]) aaae(calculated.to_numpy(), expected) def test_history_is_too_short(): # first value is best, so history of accepted parameters has only one entry - hist = { - "criterion": [5, -4.1, -4.4, -4.0], - "params": [{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], - "runtime": [0, 1, 2, 3], - } + hist = History( + direction=Direction.MAXIMIZE, + params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], + fun=[5, 4.1, 4.4, 4.0], + time=[0, 1, 2, 3], + ) - calculated = get_convergence_report(hist, Direction.MAXIMIZE) + calculated = get_convergence_report(hist) assert calculated is None From bf6cbe6435785b1e671d23fdc96bf69e9362ee00 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 14:14:51 +0100 Subject: [PATCH 05/24] Use History and not HistoryArrays in history_plots.py --- src/optimagic/visualization/history_plots.py | 47 ++++++++++++-------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/src/optimagic/visualization/history_plots.py b/src/optimagic/visualization/history_plots.py index 4c4797b53..f4514368b 100644 --- a/src/optimagic/visualization/history_plots.py +++ b/src/optimagic/visualization/history_plots.py @@ -10,7 +10,7 @@ from optimagic.config import PLOTLY_PALETTE, PLOTLY_TEMPLATE from optimagic.logging.logger import LogReader, SQLiteLogOptions from optimagic.optimization.algorithm import Algorithm -from optimagic.optimization.history_tools import get_history_arrays +from optimagic.optimization.history import History from optimagic.optimization.optimize_result import OptimizeResult from optimagic.parameters.tree_registry import get_registry from optimagic.typing import Direction @@ -59,7 +59,7 @@ def criterion_plot( palette = [palette] palette = itertools.cycle(palette) - key = "monotone_criterion" if monotone else "criterion" + fun_or_monotone_fun = "monotone_fun" if monotone else "fun" # ================================================================================== # Extract plotting data from results objects / data base @@ -103,9 +103,7 @@ def criterion_plot( } for i, local_history in enumerate(data[0]["local_histories"]): - history = get_history_arrays( - local_history, Direction(data[0]["direction"]) - )[key] + history = getattr(local_history, fun_or_monotone_fun) if max_evaluations is not None and len(history) > max_evaluations: history = history[:max_evaluations] @@ -128,7 +126,8 @@ def criterion_plot( _history = _data["stacked_local_histories"] else: _history = _data["history"] - history = get_history_arrays(_history, _data["direction"])[key] + + history = getattr(_history, fun_or_monotone_fun) if max_evaluations is not None and len(history) > max_evaluations: history = history[:max_evaluations] @@ -253,7 +252,7 @@ def params_plot( raise TypeError("result must be an OptimizeResult or a path to a log file.") if data["stacked_local_histories"] is not None: - history = data["stacked_local_histories"]["params"] + history = data["stacked_local_histories"].params else: history = data["history"].params @@ -338,14 +337,13 @@ def _extract_plotting_data_from_results_object( local_histories = None if stack_multistart and local_histories is not None: - stacked = _get_stacked_local_histories(local_histories) + stacked = _get_stacked_local_histories(local_histories, res.direction) if show_exploration: - stacked["params"] = ( - res.multistart_info.exploration_sample[::-1] + stacked["params"] - ) - stacked["criterion"] = ( - res.multistart_info.exploration_results.tolist()[::-1] - + stacked["criterion"] + stacked = History( + direction=stacked.direction, + fun=res.multistart_info.exploration_results.tolist()[::-1] + + stacked.fun, + params=res.multistart_info.exploration_sample[::-1] + stacked.params, ) else: stacked = None @@ -387,16 +385,23 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration direction = _problem_table["direction"].tolist()[-1] - history, local_histories, exploration = reader.read_multistart_history(direction) + _history, local_histories, exploration = reader.read_multistart_history(direction) if stack_multistart and local_histories is not None: - stacked = _get_stacked_local_histories(local_histories, history) + stacked = _get_stacked_local_histories(local_histories, direction, _history) if show_exploration: stacked["params"] = exploration["params"][::-1] + stacked["params"] stacked["criterion"] = exploration["criterion"][::-1] + stacked["criterion"] else: stacked = None + history = History( + direction=direction, + fun=_history["fun"], + params=_history["params"], + time=_history["time"], + ) + data = { "history": history, "direction": direction, @@ -408,7 +413,7 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration return data -def _get_stacked_local_histories(local_histories, history=None): +def _get_stacked_local_histories(local_histories, direction, history=None): """Stack local histories. Local histories is a list of dictionaries, each of the same structure. We transform @@ -427,4 +432,10 @@ def _get_stacked_local_histories(local_histories, history=None): stacked["criterion"].extend(history.fun) stacked["params"].extend(history.params) stacked["runtime"].extend(history.time) - return stacked + + return History( + direction=direction, + fun=stacked["criterion"], + params=stacked["params"], + time=stacked["runtime"], + ) From e6b6066c844ca45b0fb602600a6146d5b4150154 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 14:16:22 +0100 Subject: [PATCH 06/24] Remove history_tools.py --- src/optimagic/optimization/history_tools.py | 88 ------------------- .../optimization/test_history_tools.py | 27 ------ 2 files changed, 115 deletions(-) delete mode 100644 src/optimagic/optimization/history_tools.py delete mode 100644 tests/optimagic/optimization/test_history_tools.py diff --git a/src/optimagic/optimization/history_tools.py b/src/optimagic/optimization/history_tools.py deleted file mode 100644 index 770f1930e..000000000 --- a/src/optimagic/optimization/history_tools.py +++ /dev/null @@ -1,88 +0,0 @@ -from dataclasses import dataclass -from functools import partial - -import numpy as np -from numpy.typing import NDArray -from pybaum import tree_just_flatten - -from optimagic.optimization.history import History -from optimagic.parameters.tree_registry import get_registry -from optimagic.typing import Direction - - -@dataclass(frozen=True) -class HistoryArrays: - fun: NDArray[np.float64] - params: NDArray[np.float64] - time: NDArray[np.float64] - monotone_fun: NDArray[np.float64] - is_accepted: NDArray[np.bool_] - - @property - def criterion(self) -> NDArray[np.float64]: - return self.fun - - @property - def monotone_criterion(self) -> NDArray[np.float64]: - return self.monotone_fun - - def __getitem__(self, key: str) -> NDArray[np.float64] | NDArray[np.bool_]: - return getattr(self, key) - - -def get_history_arrays(history: History, direction: Direction) -> HistoryArrays: - # ================================================================================== - # Handle deprecations for now - # ================================================================================== - assert direction in [Direction.MINIMIZE, Direction.MAXIMIZE] - - if isinstance(history, dict): - parhist = history["params"] - funhist = history["criterion"] - timehist = history["runtime"] - - else: - parhist = history.params - funhist = history.fun - timehist = history.time - - # ================================================================================== - # Filter out evaluations that do not have a `fun` value - # ================================================================================== - - parhist = [p for p, f in zip(parhist, funhist, strict=False) if f is not None] - timehist = [t for t, f in zip(timehist, funhist, strict=False) if f is not None] - funhist = [f for f in funhist if f is not None] - - # ================================================================================== - - is_flat = ( - len(parhist) > 0 and isinstance(parhist[0], np.ndarray) and parhist[0].ndim == 1 - ) - if is_flat: - to_internal = lambda x: x.tolist() - else: - registry = get_registry(extended=True) - to_internal = partial(tree_just_flatten, registry=registry) - - critvals = np.array(funhist) - - params = np.array([to_internal(p) for p in parhist]) - - runtimes = np.array(timehist) - - if direction == Direction.MINIMIZE: - monotone = np.minimum.accumulate(critvals) - is_accepted = critvals <= monotone - elif direction == Direction.MAXIMIZE: - monotone = np.maximum.accumulate(critvals) - is_accepted = critvals >= monotone - - out = HistoryArrays( - fun=critvals, - params=params, - time=runtimes, - monotone_fun=monotone, - is_accepted=is_accepted, - ) - return out diff --git a/tests/optimagic/optimization/test_history_tools.py b/tests/optimagic/optimization/test_history_tools.py deleted file mode 100644 index 4b4f4d100..000000000 --- a/tests/optimagic/optimization/test_history_tools.py +++ /dev/null @@ -1,27 +0,0 @@ -import numpy as np -import pytest -from numpy.testing import assert_array_almost_equal as aaae - -from optimagic.optimization.history_tools import get_history_arrays -from optimagic.typing import Direction - - -@pytest.fixture() -def history(): - hist = { - "criterion": [5, 4, 5.5, 4.2], - "params": [{"a": 0}, {"a": 1}, {"a": 2}, {"a": 3}], - "runtime": [0, 1, 2, 3], - } - return hist - - -def test_get_history_arrays_minimize(history): - calculated = get_history_arrays(history, Direction.MINIMIZE) - - aaae(calculated.is_accepted, np.array([True, True, False, False])) - - -def test_get_history_arrays_maximize(history): - calculated = get_history_arrays(history, Direction.MAXIMIZE) - aaae(calculated.is_accepted, np.array([True, False, True, False])) From d8fefc1cac82aecbd95a35b1bf8a2d9bc1e5bb0c Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 14:29:08 +0100 Subject: [PATCH 07/24] Update error message for direction init argument in History class --- src/optimagic/optimization/history.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 3c8e94867..2e46d9d0d 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -1,7 +1,7 @@ import warnings from dataclasses import dataclass from functools import partial -from typing import Any +from typing import Any, Literal import numpy as np from numpy.typing import NDArray @@ -23,7 +23,7 @@ class History: # TODO: add counters for the relevant evaluations def __init__( self, - direction: Direction, + direction: Direction | Literal["minimize", "maximize"], params: list[PyTree] | None = None, fun: list[float | None] | None = None, time: list[float] | None = None, @@ -38,9 +38,13 @@ def __init__( recover a history from a log. """ - if direction not in [Direction.MINIMIZE, Direction.MAXIMIZE]: - raise ValueError(f"Invalid direction: {direction}.") - self.direction = direction + try: + self.direction = Direction(direction) + except ValueError: + valid_options = list(Direction.__members__.values()) + msg = f"Invalid direction: '{direction}'. Choose from {valid_options}." + raise ValueError(msg) from None + self._params = params if params is not None else [] self._fun = fun if fun is not None else [] self._time = time if time is not None else [] From 6a991af47cd658362f064e994f8c6fb64b54817b Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 16:54:17 +0100 Subject: [PATCH 08/24] Add additional tests for history. --- tests/optimagic/optimization/test_history.py | 81 +++++++++++++++++--- 1 file changed, 72 insertions(+), 9 deletions(-) diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index 62ccc0284..9c1493401 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -1,5 +1,7 @@ +import numpy as np import pytest from numpy.testing import assert_array_almost_equal as aaae +from numpy.testing import assert_array_equal from optimagic.optimization.history import History, HistoryEntry from optimagic.typing import Direction, EvalTask @@ -8,9 +10,9 @@ @pytest.fixture def history_entries(): return [ - HistoryEntry(params=[1, 2, 3], fun=1, time=0.1, task=EvalTask.FUN), - HistoryEntry(params=[4, 5, 6], fun=2, time=0.2, task=EvalTask.FUN), - HistoryEntry(params=[7, 8, 9], fun=3, time=0.3, task=EvalTask.FUN), + HistoryEntry(params={"a": 1, "b": [2, 3]}, fun=1, time=0.1, task=EvalTask.FUN), + HistoryEntry(params={"a": 4, "b": [5, 6]}, fun=3, time=0.2, task=EvalTask.FUN), + HistoryEntry(params={"a": 7, "b": [8, 9]}, fun=2, time=0.3, task=EvalTask.FUN), ] @@ -19,21 +21,82 @@ def test_history_add_entry(history_entries): for entry in history_entries: history.add_entry(entry) - assert history.params == [[1, 2, 3], [4, 5, 6], [7, 8, 9]] - assert history.fun == [1, 2, 3] + assert history.direction == Direction.MINIMIZE + + assert history.params == [ + {"a": 1, "b": [2, 3]}, + {"a": 4, "b": [5, 6]}, + {"a": 7, "b": [8, 9]}, + ] + assert history.fun == [1, 3, 2] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 1, 2] - assert history.direction == Direction.MINIMIZE aaae(history.time, [0.0, 0.1, 0.2]) + assert_array_equal(history.fun_array, np.array([1, 3, 2], dtype=np.float64)) + assert_array_equal(history.monotone_fun, np.array([1, 1, 1], dtype=np.float64)) + assert_array_equal( + history.flat_params_array, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + ) + aaae(history.time_array, np.array([0.0, 0.1, 0.2])) + def test_history_add_batch(history_entries): history = History(Direction.MAXIMIZE) history.add_batch(history_entries) - assert history.params == [[1, 2, 3], [4, 5, 6], [7, 8, 9]] - assert history.fun == [1, 2, 3] + assert history.direction == Direction.MAXIMIZE + + assert history.params == [ + {"a": 1, "b": [2, 3]}, + {"a": 4, "b": [5, 6]}, + {"a": 7, "b": [8, 9]}, + ] + assert history.fun == [1, 3, 2] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 0, 0] - assert history.direction == Direction.MAXIMIZE aaae(history.time, [0.0, 0.1, 0.2]) + + assert_array_equal(history.fun_array, np.array([1, 3, 2], dtype=np.float64)) + assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) + assert_array_equal( + history.flat_params_array, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + ) + aaae(history.time_array, np.array([0.0, 0.1, 0.2])) + + +def test_history_from_data(): + params = [{"a": 1, "b": [2, 3]}, {"a": 4, "b": [5, 6]}, {"a": 7, "b": [8, 9]}] + fun = [1, 3, 2] + task = [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] + batches = [0, 0, 0] + time = [0.0, 0.1, 0.2] + + history = History( + direction=Direction.MAXIMIZE, + fun=fun, + params=params, + task=task, + batches=batches, + time=time, + ) + + assert history.direction == Direction.MAXIMIZE + + assert history.params == params + assert history.fun == fun + assert history.task == task + assert history.batches == batches + aaae(history.time, time) + + assert_array_equal(history.fun_array, np.array(fun, dtype=np.float64)) + assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) + assert_array_equal( + history.flat_params_array, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + ) + aaae(history.time_array, np.array(time)) + + +def test_history_invalid_direction(): + with pytest.raises(ValueError, match="Invalid direction: 'invalid'. Choose from"): + History("invalid") From b4a186dbfb942a54516ac36d694241a9f0ff53b8 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sat, 30 Nov 2024 18:59:18 +0100 Subject: [PATCH 09/24] Refactor History class --- .../optimization/convergence_report.py | 4 +- src/optimagic/optimization/history.py | 163 ++++++++++++------ src/optimagic/optimization/process_results.py | 2 +- src/optimagic/visualization/history_plots.py | 4 +- .../optimization/test_convergence_report.py | 6 +- tests/optimagic/optimization/test_history.py | 7 +- 6 files changed, 121 insertions(+), 65 deletions(-) diff --git a/src/optimagic/optimization/convergence_report.py b/src/optimagic/optimization/convergence_report.py index 5b039fb58..02fa76c4f 100644 --- a/src/optimagic/optimization/convergence_report.py +++ b/src/optimagic/optimization/convergence_report.py @@ -7,8 +7,8 @@ def get_convergence_report(history: History) -> dict[str, dict[str, float]] | None: is_accepted = history.is_accepted - critvals = history.fun_array[is_accepted] - params = history.flat_params_array[is_accepted] + critvals = np.array(history.fun, dtype=np.float64)[is_accepted] + params = np.array(history.flat_params, dtype=np.float64)[is_accepted] if len(critvals) < 2: out = None diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 2e46d9d0d..e8047dcc6 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -1,12 +1,13 @@ import warnings from dataclasses import dataclass from functools import partial -from typing import Any, Literal +from typing import Any import numpy as np +import pandas as pd from numpy.typing import NDArray +from pybaum import leaf_names, tree_just_flatten -from optimagic.parameters.tree_conversion import tree_just_flatten from optimagic.parameters.tree_registry import get_registry from optimagic.typing import Direction, EvalTask, PyTree @@ -19,14 +20,18 @@ class HistoryEntry: task: EvalTask +class CostModel: + pass + + class History: # TODO: add counters for the relevant evaluations def __init__( self, - direction: Direction | Literal["minimize", "maximize"], + direction: Direction, params: list[PyTree] | None = None, fun: list[float | None] | None = None, - time: list[float] | None = None, + start_time: list[float] | None = None, batches: list[int] | None = None, task: list[EvalTask] | None = None, ) -> None: @@ -38,16 +43,10 @@ def __init__( recover a history from a log. """ - try: - self.direction = Direction(direction) - except ValueError: - valid_options = list(Direction.__members__.values()) - msg = f"Invalid direction: '{direction}'. Choose from {valid_options}." - raise ValueError(msg) from None - + self.direction = direction self._params = params if params is not None else [] self._fun = fun if fun is not None else [] - self._time = time if time is not None else [] + self._start_time = start_time if start_time is not None else [] self._batches = batches if batches is not None else [] self._task = task if task is not None else [] @@ -60,7 +59,7 @@ def add_entry(self, entry: HistoryEntry, batch_id: int | None = None) -> None: batch_id = self._get_next_batch_id() self._params.append(entry.params) self._fun.append(entry.fun) - self._time.append(entry.time) + self._start_time.append(entry.time) self._batches.append(batch_id) self._task.append(entry.task) @@ -95,65 +94,84 @@ def _get_next_batch_id(self) -> int: # Function value and monotone function value # ---------------------------------------------------------------------------------- + def fun_data(self, cost_model: CostModel, monotone: bool) -> pd.DataFrame: + """Return the function value data. + + Args: + cost_model: The cost model that is used to calculate the time measure. + monotone: Whether to return the monotone function values. + + Returns: + pd.DataFrame: The function value data. The columns are: 'task', 'time' and + 'value'. If monotone is False, value is the fun value, otherwise the + monotone function value. + + """ + @property def fun(self) -> list[float | None]: return self._fun @property - def fun_array(self) -> NDArray[np.float64]: - return np.array(self._valid_fun, dtype=np.float64) + def monotone_fun(self) -> NDArray[np.float64]: + """The monotone function value of the history. - @property - def _valid_fun(self) -> list[float]: - return [f for f in self.fun if f is not None] + If the value is None, the output at that position is nan. - @property - def monotone_fun(self) -> NDArray[np.float64]: - if self.direction == Direction.MINIMIZE: - return np.minimum.accumulate(self.fun_array) - elif self.direction == Direction.MAXIMIZE: - return np.maximum.accumulate(self.fun_array) + """ + return _calculate_monotone_sequence(self.fun, direction=self.direction) # Acceptance # ---------------------------------------------------------------------------------- @property def is_accepted(self) -> NDArray[np.bool_]: + """Boolean indicator whether a function value is accepted. + + A function value is accepted if it is smaller (or equal) than the monotone + function value counterpart in the case of minimization, or larger (or equal) in + the case of maximization. If the value is None, the output at that position is + False. + + """ + fun_arr = np.array(self.fun, dtype=np.float64) if self.direction == Direction.MINIMIZE: - return self.fun_array <= self.monotone_fun + return fun_arr <= self.monotone_fun elif self.direction == Direction.MAXIMIZE: - return self.fun_array >= self.monotone_fun + return fun_arr >= self.monotone_fun # Parameters # ---------------------------------------------------------------------------------- + def params_data(self, cost_model: CostModel) -> pd.DataFrame: + """Return the parameter data. + + Args: + cost_model: The cost model that is used to calculate the time measure. + + Returns: + pd.DataFrame: The parameter data. The columns are: 'name' (the parameter + names), 'task', 'time' and 'value' (the parameter values). + + """ + @property def params(self) -> list[PyTree]: return self._params @property - def flat_params_array(self) -> NDArray[np.float64]: - return np.array(_flatten_params(self._valid_params), dtype=np.float64) + def flat_params(self) -> list[list[float]]: + return _get_flat_params(self._params) @property - def _valid_params(self) -> list[PyTree]: - return [p for p, f in zip(self.params, self.fun, strict=True) if f is not None] + def flat_param_names(self) -> list[str]: + return _get_flat_param_names(self._params) # Time # ---------------------------------------------------------------------------------- - @property - def time(self) -> list[float]: - arr = np.array(self._time) - return (arr - arr[0]).tolist() - - @property - def time_array(self) -> NDArray[np.float64]: - return np.array(self._valid_time, dtype=np.float64) - - @property - def _valid_time(self) -> list[float]: - return [t for t, f in zip(self.time, self.fun, strict=True) if f is not None] + def get_time(self, cost_model: CostModel) -> list[float]: + pass # Batches # ---------------------------------------------------------------------------------- @@ -173,6 +191,16 @@ def task(self) -> list[EvalTask]: # Add deprecated dict access # ================================================================================== + @property + def time(self) -> list[float]: + msg = ( + "The attribute `time` of History will be deprecated soon. Use the " + f"`{self.get_time.__name__}` method instead." + ) + warnings.warn(msg, FutureWarning) + arr = np.array(self._start_time) + return (arr - arr[0]).tolist() + @property def criterion(self) -> list[float | None]: msg = "The attribute `criterion` of History is deprecated. Use `fun` instead." @@ -181,7 +209,10 @@ def criterion(self) -> list[float | None]: @property def runtime(self) -> list[float]: - msg = "The attribute `runtime` of History is deprecated. Use `time` instead." + msg = ( + "The attribute `runtime` of History will be deprecated soon. Use the " + f"`{self.get_time.__name__}` method instead." + ) warnings.warn(msg, FutureWarning) return self.time @@ -191,17 +222,47 @@ def __getitem__(self, key: str) -> Any: return getattr(self, key) -def _flatten_params(valid_params: list[PyTree]) -> list[list[float]]: - is_flat = ( - len(valid_params) > 0 - and isinstance(valid_params[0], np.ndarray) - and valid_params[0].ndim == 1 - ) +# ====================================================================================== +# Methods +# ====================================================================================== + - if is_flat: +def _get_flat_params(params: list[PyTree]) -> list[list[float]]: + if len(params) > 0 and _is_1d_numpy_array(params[0]): + # fast path flatten = lambda x: x.tolist() else: registry = get_registry(extended=True) flatten = partial(tree_just_flatten, registry=registry) - return [flatten(p) for p in valid_params] + return [flatten(p) for p in params] + + +def _get_flat_param_names(params: list[PyTree]) -> list[str]: + if _is_1d_numpy_array(params[0]): + # fast path + return np.arange(params[0].size).astype(str).tolist() + + registry = get_registry(extended=True) + return leaf_names(params[0], registry=registry) + + +def _is_1d_numpy_array(param: PyTree) -> bool: + return isinstance(param, np.ndarray) and param.ndim == 1 + + +def _calculate_monotone_sequence( + sequence: list[float | None], direction: Direction +) -> NDArray[np.float64]: + sequence_arr = np.array(sequence, dtype=np.float64) # converts None to nan + none_mask = np.isnan(sequence_arr) + + if direction == Direction.MINIMIZE: + sequence_arr[none_mask] = np.inf + out = np.minimum.accumulate(sequence_arr) + elif direction == Direction.MAXIMIZE: + sequence_arr[none_mask] = -np.inf + out = np.maximum.accumulate(sequence_arr) + + out[none_mask] = np.nan + return out diff --git a/src/optimagic/optimization/process_results.py b/src/optimagic/optimization/process_results.py index c77acf665..a9b1b5946 100644 --- a/src/optimagic/optimization/process_results.py +++ b/src/optimagic/optimization/process_results.py @@ -112,7 +112,7 @@ def process_multistart_result( direction=extra_fields.direction, fun=[opt.fun for opt in info.local_optima], params=[opt.params for opt in info.local_optima], - time=[np.nan for _ in info.local_optima], + start_time=[np.nan for _ in info.local_optima], ) conv_report = get_convergence_report(report_history) diff --git a/src/optimagic/visualization/history_plots.py b/src/optimagic/visualization/history_plots.py index f4514368b..933e1f8d7 100644 --- a/src/optimagic/visualization/history_plots.py +++ b/src/optimagic/visualization/history_plots.py @@ -399,7 +399,7 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration direction=direction, fun=_history["fun"], params=_history["params"], - time=_history["time"], + start_time=_history["time"], ) data = { @@ -437,5 +437,5 @@ def _get_stacked_local_histories(local_histories, direction, history=None): direction=direction, fun=stacked["criterion"], params=stacked["params"], - time=stacked["runtime"], + start_time=stacked["runtime"], ) diff --git a/tests/optimagic/optimization/test_convergence_report.py b/tests/optimagic/optimization/test_convergence_report.py index 823ef629a..23bcaba3e 100644 --- a/tests/optimagic/optimization/test_convergence_report.py +++ b/tests/optimagic/optimization/test_convergence_report.py @@ -12,7 +12,7 @@ def test_get_convergence_report_minimize(): direction=Direction.MINIMIZE, params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], fun=[5, 4.1, 4.4, 4.0], - time=[0, 1, 2, 3], + start_time=[0, 1, 2, 3], ) calculated = pd.DataFrame.from_dict(get_convergence_report(hist)) @@ -26,7 +26,7 @@ def test_get_convergence_report_maximize(): direction=Direction.MAXIMIZE, params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], fun=[-5, -4.1, -4.4, -4.0], - time=[0, 1, 2, 3], + start_time=[0, 1, 2, 3], ) calculated = pd.DataFrame.from_dict(get_convergence_report(hist)) @@ -41,7 +41,7 @@ def test_history_is_too_short(): direction=Direction.MAXIMIZE, params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], fun=[5, 4.1, 4.4, 4.0], - time=[0, 1, 2, 3], + start_time=[0, 1, 2, 3], ) calculated = get_convergence_report(hist) diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index 9c1493401..4c0c0af08 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -78,7 +78,7 @@ def test_history_from_data(): params=params, task=task, batches=batches, - time=time, + start_time=time, ) assert history.direction == Direction.MAXIMIZE @@ -95,8 +95,3 @@ def test_history_from_data(): history.flat_params_array, np.arange(1, 10, dtype=np.float64).reshape(3, 3) ) aaae(history.time_array, np.array(time)) - - -def test_history_invalid_direction(): - with pytest.raises(ValueError, match="Invalid direction: 'invalid'. Choose from"): - History("invalid") From 0b7cfe0334d8302098c38cb487afd37bb44490e9 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 11:15:33 +0100 Subject: [PATCH 10/24] Fix tests for refactored History class --- src/optimagic/optimization/history.py | 9 +-- tests/optimagic/optimization/test_history.py | 61 +++++++++++--------- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index e8047dcc6..d30c01939 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -8,6 +8,7 @@ from numpy.typing import NDArray from pybaum import leaf_names, tree_just_flatten +from optimagic.optimization.cost_model import CostModel from optimagic.parameters.tree_registry import get_registry from optimagic.typing import Direction, EvalTask, PyTree @@ -20,10 +21,6 @@ class HistoryEntry: task: EvalTask -class CostModel: - pass - - class History: # TODO: add counters for the relevant evaluations def __init__( @@ -266,3 +263,7 @@ def _calculate_monotone_sequence( out[none_mask] = np.nan return out + + +def _get_time(history: History, cost_model: CostModel) -> list[float]: + pass diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index 4c0c0af08..e8a1f7215 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -6,6 +6,10 @@ from optimagic.optimization.history import History, HistoryEntry from optimagic.typing import Direction, EvalTask +# ====================================================================================== +# Test histories add entries and batches methods +# ====================================================================================== + @pytest.fixture def history_entries(): @@ -31,14 +35,13 @@ def test_history_add_entry(history_entries): assert history.fun == [1, 3, 2] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 1, 2] - aaae(history.time, [0.0, 0.1, 0.2]) + with pytest.warns(FutureWarning): + aaae(history.time, [0.0, 0.1, 0.2]) - assert_array_equal(history.fun_array, np.array([1, 3, 2], dtype=np.float64)) assert_array_equal(history.monotone_fun, np.array([1, 1, 1], dtype=np.float64)) assert_array_equal( - history.flat_params_array, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3) ) - aaae(history.time_array, np.array([0.0, 0.1, 0.2])) def test_history_add_batch(history_entries): @@ -55,43 +58,47 @@ def test_history_add_batch(history_entries): assert history.fun == [1, 3, 2] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 0, 0] - aaae(history.time, [0.0, 0.1, 0.2]) + with pytest.warns(FutureWarning): + aaae(history.time, [0.0, 0.1, 0.2]) - assert_array_equal(history.fun_array, np.array([1, 3, 2], dtype=np.float64)) assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) assert_array_equal( - history.flat_params_array, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3) ) - aaae(history.time_array, np.array([0.0, 0.1, 0.2])) -def test_history_from_data(): - params = [{"a": 1, "b": [2, 3]}, {"a": 4, "b": [5, 6]}, {"a": 7, "b": [8, 9]}] - fun = [1, 3, 2] - task = [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] - batches = [0, 0, 0] - time = [0.0, 0.1, 0.2] +# ====================================================================================== +# Test history from data method +# ====================================================================================== + + +@pytest.fixture +def history_data(): + return { + "params": [{"a": 1, "b": [2, 3]}, {"a": 4, "b": [5, 6]}, {"a": 7, "b": [8, 9]}], + "fun": [1, 3, 2], + "task": [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN], + "batches": [0, 0, 0], + "start_time": [0.0, 0.1, 0.2], + } + +def test_history_from_data(history_data): history = History( direction=Direction.MAXIMIZE, - fun=fun, - params=params, - task=task, - batches=batches, - start_time=time, + **history_data, ) assert history.direction == Direction.MAXIMIZE - assert history.params == params - assert history.fun == fun - assert history.task == task - assert history.batches == batches - aaae(history.time, time) + assert history.params == history_data["params"] + assert history.fun == history_data["fun"] + assert history.task == history_data["task"] + assert history.batches == history_data["batches"] + with pytest.warns(FutureWarning): + aaae(history.time, history_data["start_time"]) - assert_array_equal(history.fun_array, np.array(fun, dtype=np.float64)) assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) assert_array_equal( - history.flat_params_array, np.arange(1, 10, dtype=np.float64).reshape(3, 3) + history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3) ) - aaae(history.time_array, np.array(time)) From 1ff4765c41383399be60cfd2b591812b4ea55b72 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 11:41:57 +0100 Subject: [PATCH 11/24] Add stop_time and init arg validation to History --- src/optimagic/optimization/cost_model.py | 55 ++++++++++++++++++++ src/optimagic/optimization/history.py | 33 ++++++++++++ tests/optimagic/optimization/test_history.py | 37 ++++++++++++- 3 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 src/optimagic/optimization/cost_model.py diff --git a/src/optimagic/optimization/cost_model.py b/src/optimagic/optimization/cost_model.py new file mode 100644 index 000000000..ccdfc3b44 --- /dev/null +++ b/src/optimagic/optimization/cost_model.py @@ -0,0 +1,55 @@ +from dataclasses import dataclass +from enum import Enum + + +# Bad name and does not need to be an enum; just representative right now. +class TimeType(Enum): + FUNC_TIME = "FUNC_TIME" + WALL_TIME = "WALL_TIME" + + +@dataclass(frozen=True) +class CostModel: + fun: float | TimeType | str | None = None + jac: float | TimeType | str | None = None + fun_and_jac: float | TimeType | str | None = None + label: str = "" + + def aggregate_batch_time(times: list[float]) -> float: + pass + + +class PerfectParallelizationCostModel(CostModel): + def aggregate_batch_time(times: list[float]) -> float: + return max(times) + + +# ====================================================================================== +# Cost models for serial case +# ====================================================================================== + +FUNCTION_TIME = CostModel( + fun=TimeType.FUNC_TIME, + jac=TimeType.FUNC_TIME, + fun_and_jac=TimeType.FUNC_TIME, + label="Function time (seconds)", +) + +WALL_TIME = CostModel( + fun=TimeType.WALL_TIME, + jac=TimeType.WALL_TIME, + fun_and_jac=TimeType.WALL_TIME, + label="Wall time (seconds)", +) + +N_EVALUATIONS = CostModel( + fun=1, jac=0, fun_and_jac=1, label="Number of criterion evaluations" +) + +# ====================================================================================== +# Cost models for parallel case +# ====================================================================================== + +N_BATCHES = PerfectParallelizationCostModel( + fun=1, jac=0, fun_and_jac=1, label="Number of batches" +) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index d30c01939..0324c123e 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -18,6 +18,7 @@ class HistoryEntry: params: PyTree fun: float | None time: float + # TODO: add stop time task: EvalTask @@ -29,6 +30,7 @@ def __init__( params: list[PyTree] | None = None, fun: list[float | None] | None = None, start_time: list[float] | None = None, + stop_time: list[float] | None = None, batches: list[int] | None = None, task: list[EvalTask] | None = None, ) -> None: @@ -40,10 +42,15 @@ def __init__( recover a history from a log. """ + _validate_history_args_are_none_or_same_length( + params, fun, start_time, stop_time, batches, task + ) + self.direction = direction self._params = params if params is not None else [] self._fun = fun if fun is not None else [] self._start_time = start_time if start_time is not None else [] + self._stop_time = stop_time if stop_time is not None else [] self._batches = batches if batches is not None else [] self._task = task if task is not None else [] @@ -57,6 +64,7 @@ def add_entry(self, entry: HistoryEntry, batch_id: int | None = None) -> None: self._params.append(entry.params) self._fun.append(entry.fun) self._start_time.append(entry.time) + # TODO: add stop time self._batches.append(batch_id) self._task.append(entry.task) @@ -267,3 +275,28 @@ def _calculate_monotone_sequence( def _get_time(history: History, cost_model: CostModel) -> list[float]: pass + + +# ====================================================================================== +# Misc +# ====================================================================================== + + +def _validate_history_args_are_none_or_same_length(*args): + """Validate the arguments of the History class initializer, except for `direction`. + + Checks that all arguments are either None or lists of the same length. + + """ + all_none = all(arg is None for arg in args) + all_list = all(isinstance(arg, list) for arg in args) + + if not all_none: + if all_list: + unique_list_lengths = set(map(len, args)) + + if len(unique_list_lengths) != 1: + raise ValueError("All list arguments must have the same length.") + + else: + raise ValueError("All arguments must be lists of the same length or None.") diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index e8a1f7215..505271545 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -3,7 +3,8 @@ from numpy.testing import assert_array_almost_equal as aaae from numpy.testing import assert_array_equal -from optimagic.optimization.history import History, HistoryEntry +from optimagic.optimization.cost_model import CostModel, TimeType +from optimagic.optimization.history import History, HistoryEntry, _get_time from optimagic.typing import Direction, EvalTask # ====================================================================================== @@ -79,7 +80,8 @@ def history_data(): "fun": [1, 3, 2], "task": [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN], "batches": [0, 0, 0], - "start_time": [0.0, 0.1, 0.2], + "start_time": [0.0, 0.15, 0.3], + "stop_time": [0.1, 0.25, 0.4], } @@ -102,3 +104,34 @@ def test_history_from_data(history_data): assert_array_equal( history.flat_params, np.arange(1, 10, dtype=np.float64).reshape(3, 3) ) + + +# ====================================================================================== +# Test _get_time method +# ====================================================================================== + + +@pytest.fixture +def history(): + data = { + "fun": [10, None, 9, None, 5], + "task": [ + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + ], + "start_time": [0, 2, 5, 7, 10], + "stop_time": [1, 4, 6, 9, 11], + "params": [3, 3, 2, 2, 1], + } + + return History(direction=Direction.MINIMIZE, **data) + + +def test_get_time_only_fun_time(history): + only_fun_time = CostModel(fun=TimeType.FUNC_TIME, label="Function time (seconds)") + got = _get_time(history, cost_model=only_fun_time) + exp = [1, 1, 2, 2, 3] + aaae(got, exp) From 7bbc079af36cb501b901ef857b9721386b3b285c Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 14:32:14 +0100 Subject: [PATCH 12/24] Implement cost model and use in History --- src/optimagic/__init__.py | 3 +- src/optimagic/optimization/cost_model.py | 55 -------------------- src/optimagic/optimization/history.py | 53 +++++++++++++++---- src/optimagic/timing.py | 42 +++++++++++++++ tests/optimagic/optimization/test_history.py | 7 ++- 5 files changed, 90 insertions(+), 70 deletions(-) delete mode 100644 src/optimagic/optimization/cost_model.py create mode 100644 src/optimagic/timing.py diff --git a/src/optimagic/__init__.py b/src/optimagic/__init__.py index 28e912234..04213b265 100644 --- a/src/optimagic/__init__.py +++ b/src/optimagic/__init__.py @@ -1,6 +1,6 @@ from __future__ import annotations -from optimagic import constraints, mark, utilities +from optimagic import constraints, mark, timing, utilities from optimagic.algorithms import algos from optimagic.benchmarking.benchmark_reports import ( convergence_report, @@ -102,4 +102,5 @@ "History", "__version__", "algos", + "timing", ] diff --git a/src/optimagic/optimization/cost_model.py b/src/optimagic/optimization/cost_model.py deleted file mode 100644 index ccdfc3b44..000000000 --- a/src/optimagic/optimization/cost_model.py +++ /dev/null @@ -1,55 +0,0 @@ -from dataclasses import dataclass -from enum import Enum - - -# Bad name and does not need to be an enum; just representative right now. -class TimeType(Enum): - FUNC_TIME = "FUNC_TIME" - WALL_TIME = "WALL_TIME" - - -@dataclass(frozen=True) -class CostModel: - fun: float | TimeType | str | None = None - jac: float | TimeType | str | None = None - fun_and_jac: float | TimeType | str | None = None - label: str = "" - - def aggregate_batch_time(times: list[float]) -> float: - pass - - -class PerfectParallelizationCostModel(CostModel): - def aggregate_batch_time(times: list[float]) -> float: - return max(times) - - -# ====================================================================================== -# Cost models for serial case -# ====================================================================================== - -FUNCTION_TIME = CostModel( - fun=TimeType.FUNC_TIME, - jac=TimeType.FUNC_TIME, - fun_and_jac=TimeType.FUNC_TIME, - label="Function time (seconds)", -) - -WALL_TIME = CostModel( - fun=TimeType.WALL_TIME, - jac=TimeType.WALL_TIME, - fun_and_jac=TimeType.WALL_TIME, - label="Wall time (seconds)", -) - -N_EVALUATIONS = CostModel( - fun=1, jac=0, fun_and_jac=1, label="Number of criterion evaluations" -) - -# ====================================================================================== -# Cost models for parallel case -# ====================================================================================== - -N_BATCHES = PerfectParallelizationCostModel( - fun=1, jac=0, fun_and_jac=1, label="Number of batches" -) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 0324c123e..db8c6e8de 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -1,15 +1,15 @@ import warnings from dataclasses import dataclass from functools import partial -from typing import Any +from typing import Any, Literal import numpy as np import pandas as pd from numpy.typing import NDArray from pybaum import leaf_names, tree_just_flatten -from optimagic.optimization.cost_model import CostModel from optimagic.parameters.tree_registry import get_registry +from optimagic.timing import CostModel from optimagic.typing import Direction, EvalTask, PyTree @@ -42,7 +42,7 @@ def __init__( recover a history from a log. """ - _validate_history_args_are_none_or_same_length( + _validate_history_args_are_all_none_or_lists_of_same_length( params, fun, start_time, stop_time, batches, task ) @@ -112,6 +112,17 @@ def fun_data(self, cost_model: CostModel, monotone: bool) -> pd.DataFrame: monotone function value. """ + time = self.get_time(cost_model) + if monotone: + fun = self.monotone_fun + else: + fun = self.fun + + task_cat = pd.Categorical( + [t.value for t in self.task], categories=[t.value for t in EvalTask] + ) + + return pd.DataFrame({"fun": fun, "task": task_cat, "time": time}) @property def fun(self) -> list[float | None]: @@ -175,8 +186,34 @@ def flat_param_names(self) -> list[str]: # Time # ---------------------------------------------------------------------------------- - def get_time(self, cost_model: CostModel) -> list[float]: - pass + def get_time( + self, cost_model: CostModel | Literal["wall_time"] + ) -> NDArray[np.float64]: + # TODO: validate that cost_model is either a CostModel or "wall_time" + + if cost_model == "wall_time": + return np.array(self._stop_time, dtype=np.float64) - self._start_time[0] + + fun_time = self._get_time_per_task( + task=EvalTask.FUN, cost_factor=cost_model.fun + ) + jac_time = self._get_time_per_task( + task=EvalTask.JAC, cost_factor=cost_model.jac + ) + fun_and_jac_time = self._get_time_per_task( + task=EvalTask.FUN_AND_JAC, cost_factor=cost_model.fun_and_jac + ) + return fun_time + jac_time + fun_and_jac_time + + def _get_time_per_task( + self, task: EvalTask, cost_factor: float | None + ) -> NDArray[np.float64]: + dummy_task = np.array([1 if t == task else 0 for t in self.task]) + if cost_factor is None: + cost_factor = np.array(self._stop_time, dtype=np.float64) - np.array( + self._start_time, dtype=np.float64 + ) + return np.cumsum(cost_factor * dummy_task) # Batches # ---------------------------------------------------------------------------------- @@ -273,16 +310,12 @@ def _calculate_monotone_sequence( return out -def _get_time(history: History, cost_model: CostModel) -> list[float]: - pass - - # ====================================================================================== # Misc # ====================================================================================== -def _validate_history_args_are_none_or_same_length(*args): +def _validate_history_args_are_all_none_or_lists_of_same_length(*args): """Validate the arguments of the History class initializer, except for `direction`. Checks that all arguments are either None or lists of the same length. diff --git a/src/optimagic/timing.py b/src/optimagic/timing.py new file mode 100644 index 000000000..5814363f0 --- /dev/null +++ b/src/optimagic/timing.py @@ -0,0 +1,42 @@ +from dataclasses import dataclass +from typing import Callable + + +@dataclass(frozen=True) +class CostModel: + fun: float | None + jac: float | None + fun_and_jac: float | None + label: str + aggregate_batch_time: Callable[[list[float]], float] + + +evaluation_time = CostModel( + fun=None, + jac=None, + fun_and_jac=None, + label="Function time (seconds)", + aggregate_batch_time=sum, +) + +fun_evaluations = CostModel( + fun=1, + jac=0, + fun_and_jac=1, + label="Number of criterion evaluations", + aggregate_batch_time=sum, +) + +fun_batches = CostModel( + fun=1, jac=0, fun_and_jac=1, label="Number of batches", aggregate_batch_time=max +) + +wall_time = "wall_time" + + +TIMING_REGISTRY = { + "evaluation_time": evaluation_time, + "fun_evaluations": fun_evaluations, + "fun_batches": fun_batches, + "wall_time": wall_time, +} diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index 505271545..dbcce966a 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -3,8 +3,7 @@ from numpy.testing import assert_array_almost_equal as aaae from numpy.testing import assert_array_equal -from optimagic.optimization.cost_model import CostModel, TimeType -from optimagic.optimization.history import History, HistoryEntry, _get_time +from optimagic.optimization.history import History, HistoryEntry from optimagic.typing import Direction, EvalTask # ====================================================================================== @@ -125,13 +124,13 @@ def history(): "start_time": [0, 2, 5, 7, 10], "stop_time": [1, 4, 6, 9, 11], "params": [3, 3, 2, 2, 1], + "batches": [0, 1, 2, 3, 4], } return History(direction=Direction.MINIMIZE, **data) def test_get_time_only_fun_time(history): - only_fun_time = CostModel(fun=TimeType.FUNC_TIME, label="Function time (seconds)") - got = _get_time(history, cost_model=only_fun_time) + got = history.get_time(cost_model=only_fun_time) exp = [1, 1, 2, 2, 3] aaae(got, exp) From f15e173a5b8edfa5fafd9d04f1798147a8f26010 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 15:46:54 +0100 Subject: [PATCH 13/24] Align codebase with new History class --- src/optimagic/optimization/history.py | 22 +++++++--- .../internal_optimization_problem.py | 44 ++++++++++++------- src/optimagic/optimization/process_results.py | 3 ++ src/optimagic/visualization/history_plots.py | 21 +++++++-- .../optimagic/optimization/test_algorithm.py | 2 +- .../optimization/test_convergence_report.py | 11 ++++- tests/optimagic/optimization/test_history.py | 42 +++++++++++------- 7 files changed, 103 insertions(+), 42 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index db8c6e8de..ae6c46685 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -17,8 +17,8 @@ class HistoryEntry: params: PyTree fun: float | None - time: float - # TODO: add stop time + start_time: float + stop_time: float task: EvalTask @@ -63,8 +63,8 @@ def add_entry(self, entry: HistoryEntry, batch_id: int | None = None) -> None: batch_id = self._get_next_batch_id() self._params.append(entry.params) self._fun.append(entry.fun) - self._start_time.append(entry.time) - # TODO: add stop time + self._start_time.append(entry.start_time) + self._stop_time.append(entry.stop_time) self._batches.append(batch_id) self._task.append(entry.task) @@ -192,7 +192,7 @@ def get_time( # TODO: validate that cost_model is either a CostModel or "wall_time" if cost_model == "wall_time": - return np.array(self._stop_time, dtype=np.float64) - self._start_time[0] + return np.array(self.stop_time, dtype=np.float64) - self.start_time[0] fun_time = self._get_time_per_task( task=EvalTask.FUN, cost_factor=cost_model.fun @@ -210,11 +210,19 @@ def _get_time_per_task( ) -> NDArray[np.float64]: dummy_task = np.array([1 if t == task else 0 for t in self.task]) if cost_factor is None: - cost_factor = np.array(self._stop_time, dtype=np.float64) - np.array( - self._start_time, dtype=np.float64 + cost_factor = np.array(self.stop_time, dtype=np.float64) - np.array( + self.start_time, dtype=np.float64 ) return np.cumsum(cost_factor * dummy_task) + @property + def start_time(self) -> list[float]: + return self._start_time + + @property + def stop_time(self) -> list[float]: + return self._stop_time + # Batches # ---------------------------------------------------------------------------------- diff --git a/src/optimagic/optimization/internal_optimization_problem.py b/src/optimagic/optimization/internal_optimization_problem.py index 2a28943c5..c15e32fe0 100644 --- a/src/optimagic/optimization/internal_optimization_problem.py +++ b/src/optimagic/optimization/internal_optimization_problem.py @@ -306,7 +306,7 @@ def _pure_evaluate_fun( issued. """ - now = time.perf_counter() + start_time = time.perf_counter() params = self._converter.params_from_internal(x) traceback: None | str = None try: @@ -333,17 +333,19 @@ def _pure_evaluate_fun( algo_fun_value, hist_fun_value = _process_fun_value( value=fun_value, solver_type=self._solver_type, direction=self._direction ) + stop_time = time.perf_counter() hist_entry = HistoryEntry( params=params, fun=hist_fun_value, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.FUN, ) log_entry = IterationState( params=params, - timestamp=now, + timestamp=start_time, scalar_fun=hist_fun_value, valid=not bool(traceback), raw_fun=fun_value, @@ -359,7 +361,7 @@ def _pure_evaluate_jac( if self._jac is None: raise ValueError("The jac function is not defined.") - now = time.perf_counter() + start_time = time.perf_counter() traceback: None | str = None params = self._converter.params_from_internal(x) @@ -389,16 +391,19 @@ def _pure_evaluate_jac( value=jac_value, direction=self._direction, converter=self._converter, x=x ) + stop_time = time.perf_counter() + hist_entry = HistoryEntry( params=params, fun=None, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.JAC, ) log_entry = IterationState( params=params, - timestamp=now, + timestamp=start_time, scalar_fun=None, valid=not bool(traceback), raw_fun=None, @@ -415,7 +420,7 @@ def _pure_evaluate_numerical_fun_and_jac( HistoryEntry, IterationState, ]: - now = time.perf_counter() + start_time = time.perf_counter() traceback: None | str = None def func(x: NDArray[np.float64]) -> SpecificFunctionValue: @@ -466,16 +471,19 @@ def func(x: NDArray[np.float64]) -> SpecificFunctionValue: if self._direction == Direction.MAXIMIZE: jac_value = -jac_value + stop_time = time.perf_counter() + hist_entry = HistoryEntry( params=self._converter.params_from_internal(x), fun=hist_fun_value, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.FUN_AND_JAC, ) log_entry = IterationState( params=self._converter.params_from_internal(x), - timestamp=now, + timestamp=start_time, scalar_fun=hist_fun_value, valid=not bool(traceback), raw_fun=fun_value, @@ -488,7 +496,7 @@ def func(x: NDArray[np.float64]) -> SpecificFunctionValue: def _pure_exploration_fun( self, x: NDArray[np.float64] ) -> tuple[float, HistoryEntry, IterationState]: - now = time.perf_counter() + start_time = time.perf_counter() params = self._converter.params_from_internal(x) traceback: None | str = None @@ -521,16 +529,19 @@ def _pure_exploration_fun( if self._direction == Direction.MAXIMIZE: hist_fun_value = np.inf + stop_time = time.perf_counter() + hist_entry = HistoryEntry( params=params, fun=hist_fun_value, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.EXPLORATION, ) log_entry = IterationState( params=params, - timestamp=now, + timestamp=start_time, scalar_fun=hist_fun_value, valid=not bool(traceback), raw_fun=fun_value, @@ -550,7 +561,7 @@ def _pure_evaluate_fun_and_jac( if self._fun_and_jac is None: raise ValueError("The fun_and_jac function is not defined.") - now = time.perf_counter() + start_time = time.perf_counter() traceback: None | str = None params = self._converter.params_from_internal(x) @@ -590,16 +601,19 @@ def _pure_evaluate_fun_and_jac( if self._direction == Direction.MAXIMIZE: out_jac = -out_jac + stop_time = time.perf_counter() + hist_entry = HistoryEntry( params=params, fun=hist_fun_value, - time=now, + start_time=start_time, + stop_time=stop_time, task=EvalTask.FUN_AND_JAC, ) log_entry = IterationState( params=params, - timestamp=now, + timestamp=start_time, scalar_fun=hist_fun_value, valid=not bool(traceback), raw_fun=fun_value, diff --git a/src/optimagic/optimization/process_results.py b/src/optimagic/optimization/process_results.py index a9b1b5946..2c0178535 100644 --- a/src/optimagic/optimization/process_results.py +++ b/src/optimagic/optimization/process_results.py @@ -113,6 +113,9 @@ def process_multistart_result( fun=[opt.fun for opt in info.local_optima], params=[opt.params for opt in info.local_optima], start_time=[np.nan for _ in info.local_optima], + stop_time=[np.nan for _ in info.local_optima], + batches=list(range(len(info.local_optima))), + task=len(info.local_optima) * [None], ) conv_report = get_convergence_report(report_history) diff --git a/src/optimagic/visualization/history_plots.py b/src/optimagic/visualization/history_plots.py index 933e1f8d7..73069c522 100644 --- a/src/optimagic/visualization/history_plots.py +++ b/src/optimagic/visualization/history_plots.py @@ -339,11 +339,18 @@ def _extract_plotting_data_from_results_object( if stack_multistart and local_histories is not None: stacked = _get_stacked_local_histories(local_histories, res.direction) if show_exploration: + fun = res.multistart_info.exploration_results.tolist()[::-1] + stacked.fun + params = res.multistart_info.exploration_sample[::-1] + stacked.params + stacked = History( direction=stacked.direction, - fun=res.multistart_info.exploration_results.tolist()[::-1] - + stacked.fun, - params=res.multistart_info.exploration_sample[::-1] + stacked.params, + fun=fun, + params=params, + # TODO: This needs to be fixed + start_time=len(fun) * [None], + stop_time=len(fun) * [None], + batches=len(fun) * [None], + task=len(fun) * [None], ) else: stacked = None @@ -400,6 +407,10 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration fun=_history["fun"], params=_history["params"], start_time=_history["time"], + # TODO: This needs to be updated + stop_time=len(_history["fun"]) * [None], + batches=len(_history["fun"]) * [None], + task=len(_history["fun"]) * [None], ) data = { @@ -438,4 +449,8 @@ def _get_stacked_local_histories(local_histories, direction, history=None): fun=stacked["criterion"], params=stacked["params"], start_time=stacked["runtime"], + # TODO: This needs to be fixed + stop_time=len(stacked["criterion"]) * [None], + task=len(stacked["criterion"]) * [None], + batches=len(stacked["criterion"]) * [None], ) diff --git a/tests/optimagic/optimization/test_algorithm.py b/tests/optimagic/optimization/test_algorithm.py index 7d78c02da..96a531ea9 100644 --- a/tests/optimagic/optimization/test_algorithm.py +++ b/tests/optimagic/optimization/test_algorithm.py @@ -117,7 +117,7 @@ def _solve_internal_problem(self, problem, x0): hist_entry = HistoryEntry( params=x0, fun=0.0, - time=0.0, + start_time=0.0, task=EvalTask.FUN, ) problem.history.add_entry(hist_entry) diff --git a/tests/optimagic/optimization/test_convergence_report.py b/tests/optimagic/optimization/test_convergence_report.py index 23bcaba3e..ea527f2bc 100644 --- a/tests/optimagic/optimization/test_convergence_report.py +++ b/tests/optimagic/optimization/test_convergence_report.py @@ -4,7 +4,7 @@ from optimagic.optimization.convergence_report import get_convergence_report from optimagic.optimization.history import History -from optimagic.typing import Direction +from optimagic.typing import Direction, EvalTask def test_get_convergence_report_minimize(): @@ -13,6 +13,9 @@ def test_get_convergence_report_minimize(): params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], fun=[5, 4.1, 4.4, 4.0], start_time=[0, 1, 2, 3], + stop_time=[1, 2, 3, 4], + task=4 * [EvalTask.FUN], + batches=[0, 1, 2, 3], ) calculated = pd.DataFrame.from_dict(get_convergence_report(hist)) @@ -27,6 +30,9 @@ def test_get_convergence_report_maximize(): params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], fun=[-5, -4.1, -4.4, -4.0], start_time=[0, 1, 2, 3], + stop_time=[1, 2, 3, 4], + task=4 * [EvalTask.FUN], + batches=[0, 1, 2, 3], ) calculated = pd.DataFrame.from_dict(get_convergence_report(hist)) @@ -42,6 +48,9 @@ def test_history_is_too_short(): params=[{"a": 0}, {"a": 2.1}, {"a": 2.5}, {"a": 2.0}], fun=[5, 4.1, 4.4, 4.0], start_time=[0, 1, 2, 3], + stop_time=[1, 2, 3, 4], + task=4 * [EvalTask.FUN], + batches=[0, 1, 2, 3], ) calculated = get_convergence_report(hist) diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index dbcce966a..5115e402f 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -14,9 +14,27 @@ @pytest.fixture def history_entries(): return [ - HistoryEntry(params={"a": 1, "b": [2, 3]}, fun=1, time=0.1, task=EvalTask.FUN), - HistoryEntry(params={"a": 4, "b": [5, 6]}, fun=3, time=0.2, task=EvalTask.FUN), - HistoryEntry(params={"a": 7, "b": [8, 9]}, fun=2, time=0.3, task=EvalTask.FUN), + HistoryEntry( + params={"a": 1, "b": [2, 3]}, + fun=1, + start_time=0.1, + stop_time=0.2, + task=EvalTask.FUN, + ), + HistoryEntry( + params={"a": 4, "b": [5, 6]}, + fun=3, + start_time=0.2, + stop_time=0.3, + task=EvalTask.FUN, + ), + HistoryEntry( + params={"a": 7, "b": [8, 9]}, + fun=2, + start_time=0.3, + stop_time=0.4, + task=EvalTask.FUN, + ), ] @@ -35,8 +53,8 @@ def test_history_add_entry(history_entries): assert history.fun == [1, 3, 2] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 1, 2] - with pytest.warns(FutureWarning): - aaae(history.time, [0.0, 0.1, 0.2]) + aaae(history.start_time, [0.1, 0.2, 0.3]) + aaae(history.stop_time, [0.2, 0.3, 0.4]) assert_array_equal(history.monotone_fun, np.array([1, 1, 1], dtype=np.float64)) assert_array_equal( @@ -58,8 +76,8 @@ def test_history_add_batch(history_entries): assert history.fun == [1, 3, 2] assert history.task == [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN] assert history.batches == [0, 0, 0] - with pytest.warns(FutureWarning): - aaae(history.time, [0.0, 0.1, 0.2]) + aaae(history.start_time, [0.1, 0.2, 0.3]) + aaae(history.stop_time, [0.2, 0.3, 0.4]) assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) assert_array_equal( @@ -96,8 +114,8 @@ def test_history_from_data(history_data): assert history.fun == history_data["fun"] assert history.task == history_data["task"] assert history.batches == history_data["batches"] - with pytest.warns(FutureWarning): - aaae(history.time, history_data["start_time"]) + aaae(history.start_time, history_data["start_time"]) + aaae(history.stop_time, history_data["stop_time"]) assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) assert_array_equal( @@ -128,9 +146,3 @@ def history(): } return History(direction=Direction.MINIMIZE, **data) - - -def test_get_time_only_fun_time(history): - got = history.get_time(cost_model=only_fun_time) - exp = [1, 1, 2, 2, 3] - aaae(got, exp) From eac0bac3b3695dbe0ba5aff1523af4ad372c9680 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 16:11:03 +0100 Subject: [PATCH 14/24] Add unit tests --- src/optimagic/optimization/history.py | 43 +++++------ tests/optimagic/optimization/test_history.py | 78 +++++++++++++++++++- 2 files changed, 98 insertions(+), 23 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index ae6c46685..aaad9bc82 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -42,7 +42,7 @@ def __init__( recover a history from a log. """ - _validate_history_args_are_all_none_or_lists_of_same_length( + _validate_args_are_all_none_or_lists_of_same_length( params, fun, start_time, stop_time, batches, task ) @@ -112,17 +112,13 @@ def fun_data(self, cost_model: CostModel, monotone: bool) -> pd.DataFrame: monotone function value. """ - time = self.get_time(cost_model) if monotone: fun = self.monotone_fun else: fun = self.fun - - task_cat = pd.Categorical( - [t.value for t in self.task], categories=[t.value for t in EvalTask] - ) - - return pd.DataFrame({"fun": fun, "task": task_cat, "time": time}) + task = _task_as_categorical(self.task) + time = self.get_time(cost_model) + return pd.DataFrame({"fun": fun, "task": task, "time": time}) @property def fun(self) -> list[float | None]: @@ -170,6 +166,10 @@ def params_data(self, cost_model: CostModel) -> pd.DataFrame: names), 'task', 'time' and 'value' (the parameter values). """ + data = pd.DataFrame(self.flat_params, columns=self.flat_param_names) + data["task"] = _task_as_categorical(self.task) + data["time"] = self.get_time(cost_model) + return data @property def params(self) -> list[PyTree]: @@ -181,7 +181,7 @@ def flat_params(self) -> list[list[float]]: @property def flat_param_names(self) -> list[str]: - return _get_flat_param_names(self._params) + return _get_flat_param_names(param=self._params[0]) # Time # ---------------------------------------------------------------------------------- @@ -278,7 +278,7 @@ def __getitem__(self, key: str) -> Any: def _get_flat_params(params: list[PyTree]) -> list[list[float]]: - if len(params) > 0 and _is_1d_numpy_array(params[0]): + if len(params) > 0 and _is_1d_array(params[0]): # fast path flatten = lambda x: x.tolist() else: @@ -288,16 +288,16 @@ def _get_flat_params(params: list[PyTree]) -> list[list[float]]: return [flatten(p) for p in params] -def _get_flat_param_names(params: list[PyTree]) -> list[str]: - if _is_1d_numpy_array(params[0]): +def _get_flat_param_names(param: PyTree) -> list[str]: + if _is_1d_array(param): # fast path - return np.arange(params[0].size).astype(str).tolist() + return np.arange(param.size).astype(str).tolist() registry = get_registry(extended=True) - return leaf_names(params[0], registry=registry) + return leaf_names(param, registry=registry) -def _is_1d_numpy_array(param: PyTree) -> bool: +def _is_1d_array(param: PyTree) -> bool: return isinstance(param, np.ndarray) and param.ndim == 1 @@ -323,12 +323,7 @@ def _calculate_monotone_sequence( # ====================================================================================== -def _validate_history_args_are_all_none_or_lists_of_same_length(*args): - """Validate the arguments of the History class initializer, except for `direction`. - - Checks that all arguments are either None or lists of the same length. - - """ +def _validate_args_are_all_none_or_lists_of_same_length(*args): all_none = all(arg is None for arg in args) all_list = all(isinstance(arg, list) for arg in args) @@ -341,3 +336,9 @@ def _validate_history_args_are_all_none_or_lists_of_same_length(*args): else: raise ValueError("All arguments must be lists of the same length or None.") + + +def _task_as_categorical(task: list[EvalTask]) -> pd.Series: + return pd.Categorical( + [t.value for t in task], categories=[t.value for t in EvalTask] + ) diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index 5115e402f..dd2931e42 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -1,9 +1,19 @@ import numpy as np +import pandas as pd import pytest from numpy.testing import assert_array_almost_equal as aaae from numpy.testing import assert_array_equal -from optimagic.optimization.history import History, HistoryEntry +from optimagic.optimization.history import ( + History, + HistoryEntry, + _calculate_monotone_sequence, + _get_flat_param_names, + _get_flat_params, + _is_1d_array, + _task_as_categorical, + _validate_args_are_all_none_or_lists_of_same_length, +) from optimagic.typing import Direction, EvalTask # ====================================================================================== @@ -124,10 +134,74 @@ def test_history_from_data(history_data): # ====================================================================================== -# Test _get_time method +# Unit tests # ====================================================================================== +def test_is_1d_array(): + assert _is_1d_array(np.arange(2)) is True + assert _is_1d_array(np.eye(2)) is False + assert _is_1d_array([0, 1]) is False + + +def test_get_flat_params_pytree(): + params = [ + {"a": 1, "b": [0, 1], "c": np.arange(2)}, + {"a": 2, "b": [1, 2], "c": np.arange(2)}, + ] + got = _get_flat_params(params) + exp = [ + [1, 0, 1, 0, 1], + [2, 1, 2, 0, 1], + ] + assert_array_equal(got, exp) + + +def test_get_flat_params_fast_path(): + params = [np.arange(2)] + got = _get_flat_params(params) + exp = [[0, 1]] + assert_array_equal(got, exp) + + +def test_get_flat_param_names(): + got = _get_flat_param_names(param={"a": 0, "b": [0, 1], "c": np.arange(2)}) + exp = ["a", "b_0", "b_1", "c_0", "c_1"] + assert got == exp + + +def test_calculate_monotone_sequence_maximize(): + sequence = [0, 1, 0, 0, 2, 10, 0] + exp = [0, 1, 1, 1, 2, 10, 10] + got = _calculate_monotone_sequence(sequence, direction=Direction.MAXIMIZE) + assert_array_equal(exp, got) + + +def test_calculate_monotone_sequence_minimize(): + sequence = [10, 11, 8, 12, 0, 5] + exp = [10, 10, 8, 8, 0, 0] + got = _calculate_monotone_sequence(sequence, direction=Direction.MINIMIZE) + assert_array_equal(exp, got) + + +def test_validate_args_are_all_none_or_lists_of_same_length(): + _validate_args_are_all_none_or_lists_of_same_length(None, None) + _validate_args_are_all_none_or_lists_of_same_length([1], [1]) + + with pytest.raises(ValueError, match="All list arguments must have the same"): + _validate_args_are_all_none_or_lists_of_same_length([1], [1, 2]) + + with pytest.raises(ValueError, match="All arguments must be lists of the same"): + _validate_args_are_all_none_or_lists_of_same_length(None, [1]) + + +def test_task_as_categorical(): + task = [EvalTask.FUN, EvalTask.JAC, EvalTask.FUN_AND_JAC] + got = _task_as_categorical(task) + assert got.tolist() == ["fun", "jac", "fun_and_jac"] + assert isinstance(got.dtype, pd.CategoricalDtype) + + @pytest.fixture def history(): data = { From dff7c4c468be09606b06d2caa905569f0ea05381 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 20:13:39 +0100 Subject: [PATCH 15/24] Add tests for fun and params --- src/optimagic/optimization/history.py | 17 +- tests/optimagic/optimization/test_history.py | 245 ++++++++++++++++--- 2 files changed, 217 insertions(+), 45 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index aaad9bc82..2a26d317e 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -96,7 +96,7 @@ def _get_next_batch_id(self) -> int: # Properties to access the history # ================================================================================== - # Function value and monotone function value + # Function data, function value, and monotone function value # ---------------------------------------------------------------------------------- def fun_data(self, cost_model: CostModel, monotone: bool) -> pd.DataFrame: @@ -152,7 +152,7 @@ def is_accepted(self) -> NDArray[np.bool_]: elif self.direction == Direction.MAXIMIZE: return fun_arr >= self.monotone_fun - # Parameters + # Parameter data, params, flat params, and flat params names # ---------------------------------------------------------------------------------- def params_data(self, cost_model: CostModel) -> pd.DataFrame: @@ -163,13 +163,16 @@ def params_data(self, cost_model: CostModel) -> pd.DataFrame: Returns: pd.DataFrame: The parameter data. The columns are: 'name' (the parameter - names), 'task', 'time' and 'value' (the parameter values). + names), 'value' (the parameter values), 'task', and 'time'. """ - data = pd.DataFrame(self.flat_params, columns=self.flat_param_names) - data["task"] = _task_as_categorical(self.task) - data["time"] = self.get_time(cost_model) - return data + wide = pd.DataFrame(self.flat_params, columns=self.flat_param_names) + wide["task"] = _task_as_categorical(self.task) + wide["time"] = self.get_time(cost_model) + data = pd.melt( + wide, var_name="name", value_name="value", id_vars=["task", "time"] + ) + return data.reindex(columns=["name", "value", "task", "time"]) @property def params(self) -> list[PyTree]: diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index dd2931e42..7d3a604be 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -3,7 +3,10 @@ import pytest from numpy.testing import assert_array_almost_equal as aaae from numpy.testing import assert_array_equal +from pandas.testing import assert_frame_equal +from pybaum import tree_map +import optimagic as om from optimagic.optimization.history import ( History, HistoryEntry, @@ -17,7 +20,7 @@ from optimagic.typing import Direction, EvalTask # ====================================================================================== -# Test histories add entries and batches methods +# Test methods to add data to History (add_entry, add_batch, init) # ====================================================================================== @@ -95,14 +98,8 @@ def test_history_add_batch(history_entries): ) -# ====================================================================================== -# Test history from data method -# ====================================================================================== - - -@pytest.fixture -def history_data(): - return { +def test_history_from_data(): + data = { "params": [{"a": 1, "b": [2, 3]}, {"a": 4, "b": [5, 6]}, {"a": 7, "b": [8, 9]}], "fun": [1, 3, 2], "task": [EvalTask.FUN, EvalTask.FUN, EvalTask.FUN], @@ -111,21 +108,19 @@ def history_data(): "stop_time": [0.1, 0.25, 0.4], } - -def test_history_from_data(history_data): history = History( direction=Direction.MAXIMIZE, - **history_data, + **data, ) assert history.direction == Direction.MAXIMIZE - assert history.params == history_data["params"] - assert history.fun == history_data["fun"] - assert history.task == history_data["task"] - assert history.batches == history_data["batches"] - aaae(history.start_time, history_data["start_time"]) - aaae(history.stop_time, history_data["stop_time"]) + assert history.params == data["params"] + assert history.fun == data["fun"] + assert history.task == data["task"] + assert history.batches == data["batches"] + aaae(history.start_time, data["start_time"]) + aaae(history.stop_time, data["stop_time"]) assert_array_equal(history.monotone_fun, np.array([1, 3, 3], dtype=np.float64)) assert_array_equal( @@ -133,6 +128,200 @@ def test_history_from_data(history_data): ) +# ====================================================================================== +# Test functionality of History +# ====================================================================================== + + +@pytest.fixture +def params(): + params_tree = {"a": None, "b": {"c": None, "d": (None, None)}} + return [ + tree_map(lambda _: k, params_tree, is_leaf=lambda l: l is None) # noqa: B023 + for k in range(6) + ] + + +@pytest.fixture +def history(params): + data = { + "fun": [10, None, 9, None, 2, 5], + "task": [ + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.FUN_AND_JAC, + ], + "start_time": [0, 2, 5, 7, 10, 12], + "stop_time": [1, 4, 6, 9, 11, 14], + "params": params, + "batches": [0, 0, 1, 1, 2, 2], + } + + return History(direction=Direction.MINIMIZE, **data) + + +# Function data, function value, and monotone function value +# -------------------------------------------------------------------------------------- + + +def test_history_fun_data_with_fun_evaluations_cost_model(history): + got = history.fun_data( + cost_model=om.timing.fun_evaluations, + monotone=False, + ) + exp = pd.DataFrame( + { + "fun": [10, np.nan, 9, np.nan, 2, 5], + "task": [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + "time": [1, 1, 2, 2, 3, 4], + } + ) + assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) + + +def test_history_fun_data_with_fun_evaluations_cost_model_and_monotone(history): + got = history.fun_data( + cost_model=om.timing.fun_evaluations, + monotone=True, + ) + exp = pd.DataFrame( + { + "fun": [10, np.nan, 9, np.nan, 2, 2], + "task": [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + "time": [1, 1, 2, 2, 3, 4], + } + ) + assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) + + +@pytest.mark.xfail(reason="Must be fixed!") +def test_history_fun_data_with_fun_batches_cost_model(history): + got = history.fun_data( + cost_model=om.timing.fun_batches, + monotone=False, + ) + exp = pd.DataFrame( + { + "fun": [10, np.nan, 9, np.nan, 2, 5], + "task": [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + "time": [1, 1, 2, 2, 3, 3], + } + ) + assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) + + +def test_history_fun_data_with_evaluation_time_cost_model(history): + got = history.fun_data( + cost_model=om.timing.evaluation_time, + monotone=False, + ) + exp = pd.DataFrame( + { + "fun": [10, np.nan, 9, np.nan, 2, 5], + "task": [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + "time": [1, 3, 4, 6, 7, 9], + } + ) + assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) + + +def test_fun_property(history): + assert_array_equal(history.fun, [10, None, 9, None, 2, 5]) + + +def test_monotone_fun_property(history): + assert_array_equal(history.monotone_fun, np.array([10, np.nan, 9, np.nan, 2, 2])) + + +# Acceptance +# -------------------------------------------------------------------------------------- + + +def test_is_accepted_property(history): + got = history.is_accepted + exp = np.array([True, False, True, False, True, False]) + assert_array_equal(got, exp) + + +# Parameter data, params, flat params, and flat params names +# -------------------------------------------------------------------------------------- + + +def test_params_data_fun_evaluations_cost_model(history): + got = history.params_data(cost_model=om.timing.fun_evaluations) + exp = pd.DataFrame( + { + "name": np.repeat( + [ + "a", + "b_c", + "b_d_0", + "b_d_1", + ], + 6, + ), + "value": np.tile(list(range(6)), 4), + "task": np.tile( + [ + "fun", + "jac", + "fun", + "jac", + "fun", + "fun_and_jac", + ], + 4, + ), + "time": np.tile([1, 1, 2, 2, 3, 4], 4), + } + ) + assert_frame_equal(got, exp, check_categorical=False, check_dtype=False) + + +def test_params_property(history, params): + assert history.params == params + + +def test_flat_params_property(history): + got = history.flat_params + assert_array_equal(got, [[k for _ in range(4)] for k in range(6)]) + + +def test_flat_param_names(history): + assert history.flat_param_names == ["a", "b_c", "b_d_0", "b_d_1"] + + # ====================================================================================== # Unit tests # ====================================================================================== @@ -200,23 +389,3 @@ def test_task_as_categorical(): got = _task_as_categorical(task) assert got.tolist() == ["fun", "jac", "fun_and_jac"] assert isinstance(got.dtype, pd.CategoricalDtype) - - -@pytest.fixture -def history(): - data = { - "fun": [10, None, 9, None, 5], - "task": [ - EvalTask.FUN, - EvalTask.JAC, - EvalTask.FUN, - EvalTask.JAC, - EvalTask.FUN, - ], - "start_time": [0, 2, 5, 7, 10], - "stop_time": [1, 4, 6, 9, 11], - "params": [3, 3, 2, 2, 1], - "batches": [0, 1, 2, 3, 4], - } - - return History(direction=Direction.MINIMIZE, **data) From df8bb959dc1d547308a1298f178f1ce8346ad886 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 20:32:30 +0100 Subject: [PATCH 16/24] Finish test suite for History class --- src/optimagic/optimization/history.py | 10 +-- tests/optimagic/optimization/test_history.py | 79 +++++++++++++++++++- 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 2a26d317e..fb2bd0c23 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -117,7 +117,7 @@ def fun_data(self, cost_model: CostModel, monotone: bool) -> pd.DataFrame: else: fun = self.fun task = _task_as_categorical(self.task) - time = self.get_time(cost_model) + time = self._get_time(cost_model) return pd.DataFrame({"fun": fun, "task": task, "time": time}) @property @@ -168,7 +168,7 @@ def params_data(self, cost_model: CostModel) -> pd.DataFrame: """ wide = pd.DataFrame(self.flat_params, columns=self.flat_param_names) wide["task"] = _task_as_categorical(self.task) - wide["time"] = self.get_time(cost_model) + wide["time"] = self._get_time(cost_model) data = pd.melt( wide, var_name="name", value_name="value", id_vars=["task", "time"] ) @@ -189,7 +189,7 @@ def flat_param_names(self) -> list[str]: # Time # ---------------------------------------------------------------------------------- - def get_time( + def _get_time( self, cost_model: CostModel | Literal["wall_time"] ) -> NDArray[np.float64]: # TODO: validate that cost_model is either a CostModel or "wall_time" @@ -248,7 +248,7 @@ def task(self) -> list[EvalTask]: def time(self) -> list[float]: msg = ( "The attribute `time` of History will be deprecated soon. Use the " - f"`{self.get_time.__name__}` method instead." + f"`{self._get_time.__name__}` method instead." ) warnings.warn(msg, FutureWarning) arr = np.array(self._start_time) @@ -264,7 +264,7 @@ def criterion(self) -> list[float | None]: def runtime(self) -> list[float]: msg = ( "The attribute `runtime` of History will be deprecated soon. Use the " - f"`{self.get_time.__name__}` method instead." + f"`{self._get_time.__name__}` method instead." ) warnings.warn(msg, FutureWarning) return self.time diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index 7d3a604be..58b25b4ca 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -137,7 +137,7 @@ def test_history_from_data(): def params(): params_tree = {"a": None, "b": {"c": None, "d": (None, None)}} return [ - tree_map(lambda _: k, params_tree, is_leaf=lambda l: l is None) # noqa: B023 + tree_map(lambda _: k, params_tree, is_leaf=lambda leaf: leaf is None) # noqa: B023 for k in range(6) ] @@ -322,6 +322,83 @@ def test_flat_param_names(history): assert history.flat_param_names == ["a", "b_c", "b_d_0", "b_d_1"] +# Time +# -------------------------------------------------------------------------------------- + + +def test_get_time_per_task_fun(history): + got = history._get_time_per_task(EvalTask.FUN, cost_factor=1) + exp = np.array([1, 1, 2, 2, 3, 3]) + assert_array_equal(got, exp) + + +def test_get_time_per_task_jac(history): + got = history._get_time_per_task(EvalTask.JAC, cost_factor=1) + exp = np.array([0, 1, 1, 2, 2, 2]) + assert_array_equal(got, exp) + + +def test_get_time_per_task_fun_and_jac(history): + got = history._get_time_per_task(EvalTask.FUN_AND_JAC, cost_factor=1) + exp = np.array([0, 0, 0, 0, 0, 1]) + assert_array_equal(got, exp) + + +def test_get_time_cost_model(history): + cost_model = om.timing.CostModel( + fun=0.5, jac=1, fun_and_jac=2, label="test", aggregate_batch_time=sum + ) + got = history._get_time(cost_model) + exp = np.array( + [ + 0.5, + 0.5 + 1, + 1 + 1, + 1 + 2, + 1.5 + 2, + 1.5 + 2 + 2, + ] + ) + assert_array_equal(got, exp) + + +def test_get_time_wall_time(history): + got = history._get_time(cost_model="wall_time") + exp = np.array([1, 4, 6, 9, 11, 14]) + assert_array_equal(got, exp) + + +def test_start_time_property(history): + assert history.start_time == [0, 2, 5, 7, 10, 12] + + +def test_stop_time_property(history): + assert history.stop_time == [1, 4, 6, 9, 11, 14] + + +# Batches +# -------------------------------------------------------------------------------------- + + +def test_batches_property(history): + assert history.batches == [0, 0, 1, 1, 2, 2] + + +# Tasks +# -------------------------------------------------------------------------------------- + + +def test_task_property(history): + assert history.task == [ + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.JAC, + EvalTask.FUN, + EvalTask.FUN_AND_JAC, + ] + + # ====================================================================================== # Unit tests # ====================================================================================== From e72608b1b4a9528cb034d7f87d5b1fe68c70dfe9 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 20:45:21 +0100 Subject: [PATCH 17/24] Fix mypy errors --- src/optimagic/optimization/history.py | 21 ++++++++++++------- src/optimagic/optimization/process_results.py | 4 ++-- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index fb2bd0c23..40083eb7d 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -113,7 +113,7 @@ def fun_data(self, cost_model: CostModel, monotone: bool) -> pd.DataFrame: """ if monotone: - fun = self.monotone_fun + fun: list[float | None] | NDArray[np.float64] = self.monotone_fun else: fun = self.fun task = _task_as_categorical(self.task) @@ -213,10 +213,13 @@ def _get_time_per_task( ) -> NDArray[np.float64]: dummy_task = np.array([1 if t == task else 0 for t in self.task]) if cost_factor is None: - cost_factor = np.array(self.stop_time, dtype=np.float64) - np.array( - self.start_time, dtype=np.float64 - ) - return np.cumsum(cost_factor * dummy_task) + factor: float | NDArray[np.float64] = np.array( + self.stop_time, dtype=np.float64 + ) - np.array(self.start_time, dtype=np.float64) + else: + factor = cost_factor + + return np.cumsum(factor * dummy_task) @property def start_time(self) -> list[float]: @@ -326,13 +329,15 @@ def _calculate_monotone_sequence( # ====================================================================================== -def _validate_args_are_all_none_or_lists_of_same_length(*args): +def _validate_args_are_all_none_or_lists_of_same_length( + *args: list[Any] | None, +) -> None: all_none = all(arg is None for arg in args) all_list = all(isinstance(arg, list) for arg in args) if not all_none: if all_list: - unique_list_lengths = set(map(len, args)) + unique_list_lengths = set(map(len, args)) # type: ignore[arg-type] if len(unique_list_lengths) != 1: raise ValueError("All list arguments must have the same length.") @@ -341,7 +346,7 @@ def _validate_args_are_all_none_or_lists_of_same_length(*args): raise ValueError("All arguments must be lists of the same length or None.") -def _task_as_categorical(task: list[EvalTask]) -> pd.Series: +def _task_as_categorical(task: list[EvalTask]) -> pd.Categorical: return pd.Categorical( [t.value for t in task], categories=[t.value for t in EvalTask] ) diff --git a/src/optimagic/optimization/process_results.py b/src/optimagic/optimization/process_results.py index 2c0178535..8da70059c 100644 --- a/src/optimagic/optimization/process_results.py +++ b/src/optimagic/optimization/process_results.py @@ -8,7 +8,7 @@ from optimagic.optimization.history import History from optimagic.optimization.optimize_result import MultistartInfo, OptimizeResult from optimagic.parameters.conversion import Converter -from optimagic.typing import AggregationLevel, Direction, PyTree +from optimagic.typing import AggregationLevel, Direction, EvalTask, PyTree from optimagic.utilities import isscalar @@ -115,7 +115,7 @@ def process_multistart_result( start_time=[np.nan for _ in info.local_optima], stop_time=[np.nan for _ in info.local_optima], batches=list(range(len(info.local_optima))), - task=len(info.local_optima) * [None], + task=len(info.local_optima) * [EvalTask.FUN], ) conv_report = get_convergence_report(report_history) From fe27a3bcc77cd96eb93e6b85897c1a3e27ca5999 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Sun, 1 Dec 2024 20:52:22 +0100 Subject: [PATCH 18/24] Try to fix some warnings --- src/optimagic/benchmarking/run_benchmark.py | 2 +- src/optimagic/optimization/history.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/optimagic/benchmarking/run_benchmark.py b/src/optimagic/benchmarking/run_benchmark.py index 9e2ce4cd8..cd6d844c4 100644 --- a/src/optimagic/benchmarking/run_benchmark.py +++ b/src/optimagic/benchmarking/run_benchmark.py @@ -209,7 +209,7 @@ def _process_one_result(optimize_result, problem): criterion_history = history.fun criterion_history = np.clip(criterion_history, _solution_crit, np.inf) batches_history = history.batches - time_history = history.time + time_history = history.start_time return { "params_history": params_history_flat, diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 40083eb7d..0e4bb5efb 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -251,7 +251,7 @@ def task(self) -> list[EvalTask]: def time(self) -> list[float]: msg = ( "The attribute `time` of History will be deprecated soon. Use the " - f"`{self._get_time.__name__}` method instead." + "`start_time` method instead." ) warnings.warn(msg, FutureWarning) arr = np.array(self._start_time) @@ -267,7 +267,7 @@ def criterion(self) -> list[float | None]: def runtime(self) -> list[float]: msg = ( "The attribute `runtime` of History will be deprecated soon. Use the " - f"`{self._get_time.__name__}` method instead." + "`start_time` method instead." ) warnings.warn(msg, FutureWarning) return self.time From c86421cecbe4b974a593ea0625ce094eb98ad528 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Tue, 3 Dec 2024 12:57:47 +0100 Subject: [PATCH 19/24] Validate cost_model argument in _get_time function --- src/optimagic/optimization/history.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 0e4bb5efb..aa3af7709 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -192,7 +192,8 @@ def flat_param_names(self) -> list[str]: def _get_time( self, cost_model: CostModel | Literal["wall_time"] ) -> NDArray[np.float64]: - # TODO: validate that cost_model is either a CostModel or "wall_time" + if not isinstance(cost_model, CostModel) and cost_model != "wall_time": + raise ValueError("cost_model must be a CostModel or 'wall_time'.") if cost_model == "wall_time": return np.array(self.stop_time, dtype=np.float64) - self.start_time[0] From da279acc76c38bbf2cc61b328506fe7a16c62b05 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Tue, 3 Dec 2024 17:53:46 +0100 Subject: [PATCH 20/24] Handle batch case --- src/optimagic/optimization/history.py | 91 +++++++++++++++++++- src/optimagic/timing.py | 4 +- tests/optimagic/optimization/test_history.py | 85 +++++++++++++++--- 3 files changed, 161 insertions(+), 19 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index aa3af7709..dd9bad85b 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -1,7 +1,7 @@ import warnings from dataclasses import dataclass from functools import partial -from typing import Any, Literal +from typing import Any, Callable, Iterable, Literal import numpy as np import pandas as pd @@ -192,6 +192,16 @@ def flat_param_names(self) -> list[str]: def _get_time( self, cost_model: CostModel | Literal["wall_time"] ) -> NDArray[np.float64]: + """Return the cumulative time measure. + + Args: + cost_model: The cost model that is used to calculate the time measure. If + "wall_time", the wall time is returned. + + Returns: + np.ndarray: The time measure. + + """ if not isinstance(cost_model, CostModel) and cost_model != "wall_time": raise ValueError("cost_model must be a CostModel or 'wall_time'.") @@ -207,11 +217,31 @@ def _get_time( fun_and_jac_time = self._get_time_per_task( task=EvalTask.FUN_AND_JAC, cost_factor=cost_model.fun_and_jac ) - return fun_time + jac_time + fun_and_jac_time + + time = fun_time + jac_time + fun_and_jac_time + batch_time = _batch_apply( + data=time, + batch_ids=self.batches, + func=cost_model.aggregate_batch_time, + ) + return np.cumsum(batch_time) def _get_time_per_task( self, task: EvalTask, cost_factor: float | None ) -> NDArray[np.float64]: + """Return the time measure per task. + + Args: + task: The task for which the time is calculated. + cost_factor: The cost factor used to calculate the time. If None, the time + is the difference between the start and stop time, otherwise the time + is given by the cost factor. + + Returns: + np.ndarray: The time per task. For entries where the task is not the + requested task, the time is 0. + + """ dummy_task = np.array([1 if t == task else 0 for t in self.task]) if cost_factor is None: factor: float | NDArray[np.float64] = np.array( @@ -220,7 +250,7 @@ def _get_time_per_task( else: factor = cost_factor - return np.cumsum(factor * dummy_task) + return factor * dummy_task @property def start_time(self) -> list[float]: @@ -351,3 +381,58 @@ def _task_as_categorical(task: list[EvalTask]) -> pd.Categorical: return pd.Categorical( [t.value for t in task], categories=[t.value for t in EvalTask] ) + + +def _batch_apply( + data: NDArray[np.float64], + batch_ids: list[int], + func: Callable[[Iterable[float]], float], +) -> NDArray[np.float64]: + """Apply a reduction operator on batches of data. + + Args: + data: 1d array with data. + batch_ids: A list whose length is equal to the size of data. Values need to be + sorted and can be repeated. + func: A reduction function that takes an iterable of floats as input (e.g., a + numpy array or a list) and returns a scalar. + + Returns: + The transformed data. Has the same length as data. For each batch, the result of + the reduction operation is stored at the first index of that batch, and all + other values of that batch are set to zero. + + """ + batch_start = _get_batch_start(batch_ids) + batch_stop = [*batch_start, len(data)][1:] + + batch_result = [] + for batch, (start, stop) in zip( + batch_ids, zip(batch_start, batch_stop, strict=False), strict=False + ): + try: + batch_data = data[start:stop] + reduced = func(batch_data) + batch_result.append(reduced) + except Exception as e: + msg = ( + f"Calling function {func.__name__} on batch {batch} of the History " + f"History raised an Exception. Please verify that {func.__name__} is " + "properly defined." + ) + raise ValueError(msg) from e + + out = np.zeros_like(data) + out[batch_start] = batch_result + return out + + +def _get_batch_start(batch_ids: list[int]) -> list[int]: + """Get start indices of batch. + + This function assumes that batch_ids non-empty and sorted. + + """ + ids_arr = np.array(batch_ids, dtype=np.int64) + indices = np.where(ids_arr[:-1] != ids_arr[1:])[0] + 1 + return np.insert(indices, 0, 0).tolist() diff --git a/src/optimagic/timing.py b/src/optimagic/timing.py index 5814363f0..db83a76d2 100644 --- a/src/optimagic/timing.py +++ b/src/optimagic/timing.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Callable +from typing import Callable, Iterable @dataclass(frozen=True) @@ -8,7 +8,7 @@ class CostModel: jac: float | None fun_and_jac: float | None label: str - aggregate_batch_time: Callable[[list[float]], float] + aggregate_batch_time: Callable[[Iterable[float]], float] evaluation_time = CostModel( diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index 58b25b4ca..ab92b88a2 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -10,7 +10,9 @@ from optimagic.optimization.history import ( History, HistoryEntry, + _batch_apply, _calculate_monotone_sequence, + _get_batch_start, _get_flat_param_names, _get_flat_params, _is_1d_array, @@ -143,8 +145,8 @@ def params(): @pytest.fixture -def history(params): - data = { +def history_data(params): + return { "fun": [10, None, 9, None, 2, 5], "task": [ EvalTask.FUN, @@ -157,9 +159,19 @@ def history(params): "start_time": [0, 2, 5, 7, 10, 12], "stop_time": [1, 4, 6, 9, 11, 14], "params": params, - "batches": [0, 0, 1, 1, 2, 2], + "batches": [0, 1, 2, 3, 4, 5], } + +@pytest.fixture +def history(history_data): + return History(direction=Direction.MINIMIZE, **history_data) + + +@pytest.fixture +def history_with_batch_data(history_data): + data = history_data.copy() + data["batches"] = [0, 0, 1, 1, 2, 2] return History(direction=Direction.MINIMIZE, **data) @@ -211,9 +223,8 @@ def test_history_fun_data_with_fun_evaluations_cost_model_and_monotone(history): assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) -@pytest.mark.xfail(reason="Must be fixed!") -def test_history_fun_data_with_fun_batches_cost_model(history): - got = history.fun_data( +def test_history_fun_data_with_fun_batches_cost_model(history_with_batch_data): + got = history_with_batch_data.fun_data( cost_model=om.timing.fun_batches, monotone=False, ) @@ -328,23 +339,23 @@ def test_flat_param_names(history): def test_get_time_per_task_fun(history): got = history._get_time_per_task(EvalTask.FUN, cost_factor=1) - exp = np.array([1, 1, 2, 2, 3, 3]) + exp = np.array([1, 0, 1, 0, 1, 0]) assert_array_equal(got, exp) -def test_get_time_per_task_jac(history): - got = history._get_time_per_task(EvalTask.JAC, cost_factor=1) - exp = np.array([0, 1, 1, 2, 2, 2]) +def test_get_time_per_task_jac_cost_factor_none(history): + got = history._get_time_per_task(EvalTask.JAC, cost_factor=None) + exp = np.array([0, 2, 0, 2, 0, 0]) assert_array_equal(got, exp) def test_get_time_per_task_fun_and_jac(history): - got = history._get_time_per_task(EvalTask.FUN_AND_JAC, cost_factor=1) - exp = np.array([0, 0, 0, 0, 0, 1]) + got = history._get_time_per_task(EvalTask.FUN_AND_JAC, cost_factor=-0.5) + exp = np.array([0, 0, 0, 0, 0, -0.5]) assert_array_equal(got, exp) -def test_get_time_cost_model(history): +def test_get_time_custom_cost_model(history): cost_model = om.timing.CostModel( fun=0.5, jac=1, fun_and_jac=2, label="test", aggregate_batch_time=sum ) @@ -362,6 +373,30 @@ def test_get_time_cost_model(history): assert_array_equal(got, exp) +def test_get_time_fun_evaluations(history): + got = history._get_time(cost_model=om.timing.fun_evaluations) + exp = np.array([1, 1, 2, 2, 3, 4]) + assert_array_equal(got, exp) + + +def test_get_time_fun_batches(history): + got = history._get_time(cost_model=om.timing.fun_batches) + exp = np.array([1, 1, 2, 2, 3, 4]) + assert_array_equal(got, exp) + + +def test_get_time_fun_batches_with_batch_data(history_with_batch_data): + got = history_with_batch_data._get_time(cost_model=om.timing.fun_batches) + exp = np.array([1, 1, 2, 2, 3, 3]) + assert_array_equal(got, exp) + + +def test_get_time_evaluation_time(history): + got = history._get_time(cost_model=om.timing.evaluation_time) + exp = np.array([1, 3, 4, 6, 7, 9]) + assert_array_equal(got, exp) + + def test_get_time_wall_time(history): got = history._get_time(cost_model="wall_time") exp = np.array([1, 4, 6, 9, 11, 14]) @@ -381,7 +416,7 @@ def test_stop_time_property(history): def test_batches_property(history): - assert history.batches == [0, 0, 1, 1, 2, 2] + assert history.batches == [0, 1, 2, 3, 4, 5] # Tasks @@ -466,3 +501,25 @@ def test_task_as_categorical(): got = _task_as_categorical(task) assert got.tolist() == ["fun", "jac", "fun_and_jac"] assert isinstance(got.dtype, pd.CategoricalDtype) + + +def test_get_batch_start(): + batches = [0, 0, 1, 1, 1, 2, 2, 3] + got = _get_batch_start(batches) + assert got == [0, 2, 5, 7] + + +def test_batch_apply_sum(): + data = np.array([0, 1, 2, 3, 4]) + batch_ids = [0, 0, 1, 1, 2] + exp = np.array([1, 0, 5, 0, 4]) + got = _batch_apply(data, batch_ids, sum) + assert_array_equal(exp, got) + + +def test_batch_apply_max(): + data = np.array([0, 1, 2, 3, 4]) + batch_ids = [0, 0, 1, 1, 2] + exp = np.array([1, 0, 3, 0, 4]) + got = _batch_apply(data, batch_ids, max) + assert_array_equal(exp, got) From e08934707251855b7d6452fb554ee1ca52943074 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Tue, 3 Dec 2024 17:57:12 +0100 Subject: [PATCH 21/24] Use plural --- src/optimagic/optimization/history.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index dd9bad85b..1ecc54f0e 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -403,17 +403,17 @@ def _batch_apply( other values of that batch are set to zero. """ - batch_start = _get_batch_start(batch_ids) - batch_stop = [*batch_start, len(data)][1:] + batch_starts = _get_batch_start(batch_ids) + batch_stops = [*batch_starts, len(data)][1:] - batch_result = [] + batch_results = [] for batch, (start, stop) in zip( - batch_ids, zip(batch_start, batch_stop, strict=False), strict=False + batch_ids, zip(batch_starts, batch_stops, strict=False), strict=False ): try: batch_data = data[start:stop] reduced = func(batch_data) - batch_result.append(reduced) + batch_results.append(reduced) except Exception as e: msg = ( f"Calling function {func.__name__} on batch {batch} of the History " @@ -423,7 +423,7 @@ def _batch_apply( raise ValueError(msg) from e out = np.zeros_like(data) - out[batch_start] = batch_result + out[batch_starts] = batch_results return out From bd50a5da398c44043093aa0de23879da47dba1d4 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Thu, 5 Dec 2024 11:14:09 +0100 Subject: [PATCH 22/24] Add more test cases for invalid user funcs; check that batch time aggregator returns scalars --- src/optimagic/optimization/history.py | 34 +++++++++++------ src/optimagic/timing.py | 7 ++++ tests/optimagic/optimization/test_history.py | 39 +++++++++++++++++--- tests/optimagic/test_timing.py | 14 +++++++ 4 files changed, 77 insertions(+), 17 deletions(-) create mode 100644 tests/optimagic/test_timing.py diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 1ecc54f0e..4016d4152 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -219,12 +219,12 @@ def _get_time( ) time = fun_time + jac_time + fun_and_jac_time - batch_time = _batch_apply( + batch_aware_time = _apply_to_batch( data=time, batch_ids=self.batches, func=cost_model.aggregate_batch_time, ) - return np.cumsum(batch_time) + return np.cumsum(batch_aware_time) def _get_time_per_task( self, task: EvalTask, cost_factor: float | None @@ -383,7 +383,7 @@ def _task_as_categorical(task: list[EvalTask]) -> pd.Categorical: ) -def _batch_apply( +def _apply_to_batch( data: NDArray[np.float64], batch_ids: list[int], func: Callable[[Iterable[float]], float], @@ -392,10 +392,10 @@ def _batch_apply( Args: data: 1d array with data. - batch_ids: A list whose length is equal to the size of data. Values need to be - sorted and can be repeated. + batch_ids: A list with batch ids whose length is equal to the size of data. + Values need to be sorted and can be repeated. func: A reduction function that takes an iterable of floats as input (e.g., a - numpy array or a list) and returns a scalar. + numpy.ndarray or list) and returns a scalar. Returns: The transformed data. Has the same length as data. For each batch, the result of @@ -410,25 +410,37 @@ def _batch_apply( for batch, (start, stop) in zip( batch_ids, zip(batch_starts, batch_stops, strict=False), strict=False ): + batch_data = data[start:stop] + try: - batch_data = data[start:stop] reduced = func(batch_data) - batch_results.append(reduced) except Exception as e: msg = ( f"Calling function {func.__name__} on batch {batch} of the History " - f"History raised an Exception. Please verify that {func.__name__} is " - "properly defined." + f"raised an Exception. Please verify that {func.__name__} is " + "well-defined and takes a list of floats as input and returns a scalar." ) raise ValueError(msg) from e + try: + assert np.isscalar(reduced) + except AssertionError: + msg = ( + f"Function {func.__name__} did not return a scalar for batch {batch}. " + f"Please verify that {func.__name__} returns a scalar when called on a " + "list of floats." + ) + raise ValueError(msg) from None + + batch_results.append(reduced) + out = np.zeros_like(data) out[batch_starts] = batch_results return out def _get_batch_start(batch_ids: list[int]) -> list[int]: - """Get start indices of batch. + """Get start indices of batches. This function assumes that batch_ids non-empty and sorted. diff --git a/src/optimagic/timing.py b/src/optimagic/timing.py index db83a76d2..a9fbe7d88 100644 --- a/src/optimagic/timing.py +++ b/src/optimagic/timing.py @@ -10,6 +10,13 @@ class CostModel: label: str aggregate_batch_time: Callable[[Iterable[float]], float] + def __post_init__(self) -> None: + if not callable(self.aggregate_batch_time): + raise ValueError( + "aggregate_batch_time must be a callable, got " + f"{self.aggregate_batch_time}" + ) + evaluation_time = CostModel( fun=None, diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index ab92b88a2..bd7137922 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -10,7 +10,7 @@ from optimagic.optimization.history import ( History, HistoryEntry, - _batch_apply, + _apply_to_batch, _calculate_monotone_sequence, _get_batch_start, _get_flat_param_names, @@ -403,6 +403,13 @@ def test_get_time_wall_time(history): assert_array_equal(got, exp) +def test_get_time_invalid_cost_model(history): + with pytest.raises( + ValueError, match="cost_model must be a CostModel or 'wall_time'." + ): + history._get_time(cost_model="invalid") + + def test_start_time_property(history): assert history.start_time == [0, 2, 5, 7, 10, 12] @@ -465,12 +472,18 @@ def test_get_flat_params_fast_path(): assert_array_equal(got, exp) -def test_get_flat_param_names(): +def test_get_flat_param_names_pytree(): got = _get_flat_param_names(param={"a": 0, "b": [0, 1], "c": np.arange(2)}) exp = ["a", "b_0", "b_1", "c_0", "c_1"] assert got == exp +def test_get_flat_param_names_fast_path(): + got = _get_flat_param_names(param=np.arange(2)) + exp = ["0", "1"] + assert got == exp + + def test_calculate_monotone_sequence_maximize(): sequence = [0, 1, 0, 0, 2, 10, 0] exp = [0, 1, 1, 1, 2, 10, 10] @@ -509,17 +522,31 @@ def test_get_batch_start(): assert got == [0, 2, 5, 7] -def test_batch_apply_sum(): +def test_apply_to_batch_sum(): data = np.array([0, 1, 2, 3, 4]) batch_ids = [0, 0, 1, 1, 2] exp = np.array([1, 0, 5, 0, 4]) - got = _batch_apply(data, batch_ids, sum) + got = _apply_to_batch(data, batch_ids, sum) assert_array_equal(exp, got) -def test_batch_apply_max(): +def test_apply_to_batch_max(): data = np.array([0, 1, 2, 3, 4]) batch_ids = [0, 0, 1, 1, 2] exp = np.array([1, 0, 3, 0, 4]) - got = _batch_apply(data, batch_ids, max) + got = _apply_to_batch(data, batch_ids, max) assert_array_equal(exp, got) + + +def test_apply_to_batch_broken_func(): + data = np.array([0, 1, 2, 3, 4]) + batch_ids = [0, 0, 1, 1, 2] + with pytest.raises(ValueError, match="Calling function on batch [0, 0]"): + _apply_to_batch(data, batch_ids, func=lambda _: 1 / 0) + + +def test_apply_to_batch_func_with_non_scalar_return(): + data = np.array([0, 1, 2, 3, 4]) + batch_ids = [0, 0, 1, 1, 2] + with pytest.raises(ValueError, match="Function did not return a scalar"): + _apply_to_batch(data, batch_ids, func=lambda _list: _list) diff --git a/tests/optimagic/test_timing.py b/tests/optimagic/test_timing.py new file mode 100644 index 000000000..fd2edfc3c --- /dev/null +++ b/tests/optimagic/test_timing.py @@ -0,0 +1,14 @@ +import pytest + +from optimagic import timing + + +def test_invalid_aggregate_batch_time(): + with pytest.raises(ValueError, match="aggregate_batch_time must be a callable"): + timing.CostModel( + fun=None, + jac=None, + fun_and_jac=None, + label="label", + aggregate_batch_time="Not callable", + ) From 0c5ead31ccb0756ce0c9ced5f993219acd5dd729 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Thu, 5 Dec 2024 12:00:37 +0100 Subject: [PATCH 23/24] Some minor changes after first round of own review --- src/optimagic/optimization/history.py | 19 ++++++++++--------- src/optimagic/optimization/process_results.py | 4 ++-- src/optimagic/visualization/history_plots.py | 11 +++++++---- tests/optimagic/optimization/test_history.py | 10 +++++----- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 4016d4152..2b0215722 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -93,7 +93,7 @@ def _get_next_batch_id(self) -> int: return batch # ================================================================================== - # Properties to access the history + # Properties and methods to access the history # ================================================================================== # Function data, function value, and monotone function value @@ -243,10 +243,11 @@ def _get_time_per_task( """ dummy_task = np.array([1 if t == task else 0 for t in self.task]) + factor: float | NDArray[np.float64] if cost_factor is None: - factor: float | NDArray[np.float64] = np.array( - self.stop_time, dtype=np.float64 - ) - np.array(self.start_time, dtype=np.float64) + factor = np.array(self.stop_time, dtype=np.float64) - np.array( + self.start_time, dtype=np.float64 + ) else: factor = cost_factor @@ -342,16 +343,16 @@ def _calculate_monotone_sequence( sequence: list[float | None], direction: Direction ) -> NDArray[np.float64]: sequence_arr = np.array(sequence, dtype=np.float64) # converts None to nan - none_mask = np.isnan(sequence_arr) + nan_mask = np.isnan(sequence_arr) if direction == Direction.MINIMIZE: - sequence_arr[none_mask] = np.inf + sequence_arr[nan_mask] = np.inf out = np.minimum.accumulate(sequence_arr) elif direction == Direction.MAXIMIZE: - sequence_arr[none_mask] = -np.inf + sequence_arr[nan_mask] = -np.inf out = np.maximum.accumulate(sequence_arr) - out[none_mask] = np.nan + out[nan_mask] = np.nan return out @@ -404,7 +405,7 @@ def _apply_to_batch( """ batch_starts = _get_batch_start(batch_ids) - batch_stops = [*batch_starts, len(data)][1:] + batch_stops = [*batch_starts[1:], len(data)] batch_results = [] for batch, (start, stop) in zip( diff --git a/src/optimagic/optimization/process_results.py b/src/optimagic/optimization/process_results.py index 8da70059c..64d764174 100644 --- a/src/optimagic/optimization/process_results.py +++ b/src/optimagic/optimization/process_results.py @@ -112,8 +112,8 @@ def process_multistart_result( direction=extra_fields.direction, fun=[opt.fun for opt in info.local_optima], params=[opt.params for opt in info.local_optima], - start_time=[np.nan for _ in info.local_optima], - stop_time=[np.nan for _ in info.local_optima], + start_time=len(info.local_optima) * [np.nan], + stop_time=len(info.local_optima) * [np.nan], batches=list(range(len(info.local_optima))), task=len(info.local_optima) * [EvalTask.FUN], ) diff --git a/src/optimagic/visualization/history_plots.py b/src/optimagic/visualization/history_plots.py index 73069c522..22d98bf71 100644 --- a/src/optimagic/visualization/history_plots.py +++ b/src/optimagic/visualization/history_plots.py @@ -407,10 +407,11 @@ def _extract_plotting_data_from_database(res, stack_multistart, show_exploration fun=_history["fun"], params=_history["params"], start_time=_history["time"], - # TODO: This needs to be updated + # TODO (@janosg): Retrieve that information from `hist` once it is available. + # https://github.com/optimagic-dev/optimagic/pull/553 stop_time=len(_history["fun"]) * [None], - batches=len(_history["fun"]) * [None], task=len(_history["fun"]) * [None], + batches=list(range(len(_history["fun"]))), ) data = { @@ -449,8 +450,10 @@ def _get_stacked_local_histories(local_histories, direction, history=None): fun=stacked["criterion"], params=stacked["params"], start_time=stacked["runtime"], - # TODO: This needs to be fixed + # TODO (@janosg): Retrieve that information from `hist` once it is available + # for the IterationHistory. + # https://github.com/optimagic-dev/optimagic/pull/553 stop_time=len(stacked["criterion"]) * [None], task=len(stacked["criterion"]) * [None], - batches=len(stacked["criterion"]) * [None], + batches=list(range(len(stacked["criterion"]))), ) diff --git a/tests/optimagic/optimization/test_history.py b/tests/optimagic/optimization/test_history.py index bd7137922..72cdbb4a2 100644 --- a/tests/optimagic/optimization/test_history.py +++ b/tests/optimagic/optimization/test_history.py @@ -169,7 +169,7 @@ def history(history_data): @pytest.fixture -def history_with_batch_data(history_data): +def history_parallel(history_data): data = history_data.copy() data["batches"] = [0, 0, 1, 1, 2, 2] return History(direction=Direction.MINIMIZE, **data) @@ -223,8 +223,8 @@ def test_history_fun_data_with_fun_evaluations_cost_model_and_monotone(history): assert_frame_equal(got, exp, check_dtype=False, check_categorical=False) -def test_history_fun_data_with_fun_batches_cost_model(history_with_batch_data): - got = history_with_batch_data.fun_data( +def test_history_fun_data_with_fun_batches_cost_model(history_parallel): + got = history_parallel.fun_data( cost_model=om.timing.fun_batches, monotone=False, ) @@ -385,8 +385,8 @@ def test_get_time_fun_batches(history): assert_array_equal(got, exp) -def test_get_time_fun_batches_with_batch_data(history_with_batch_data): - got = history_with_batch_data._get_time(cost_model=om.timing.fun_batches) +def test_get_time_fun_batches_parallel(history_parallel): + got = history_parallel._get_time(cost_model=om.timing.fun_batches) exp = np.array([1, 1, 2, 2, 3, 3]) assert_array_equal(got, exp) From 33c5a18bb1ae8e567a4fc1d35ab9cba940964811 Mon Sep 17 00:00:00 2001 From: Tim Mensinger Date: Thu, 5 Dec 2024 12:05:47 +0100 Subject: [PATCH 24/24] Add explicit assumption to _apply_to_batch docstring --- src/optimagic/optimization/history.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/optimagic/optimization/history.py b/src/optimagic/optimization/history.py index 2b0215722..14df3a67e 100644 --- a/src/optimagic/optimization/history.py +++ b/src/optimagic/optimization/history.py @@ -391,6 +391,8 @@ def _apply_to_batch( ) -> NDArray[np.float64]: """Apply a reduction operator on batches of data. + This function assumes that batch_ids non-empty and sorted. + Args: data: 1d array with data. batch_ids: A list with batch ids whose length is equal to the size of data.