From 23b9c320916c4a027e2a1eefd3501410ebf41cc7 Mon Sep 17 00:00:00 2001 From: Paschal Igusti Date: Mon, 9 Dec 2024 07:06:06 -0800 Subject: [PATCH] Move get_improvement_over_baseline to the BestPointMixin (#3156) Summary: Pull Request resolved: https://github.com/facebook/Ax/pull/3156 Differential Revision: D66472613 --- .../tests/test_torch_moo_modelbridge.py | 2 +- ax/plot/parallel_coordinates.py | 3 +- ax/plot/tests/test_traces.py | 2 +- ax/service/ax_client.py | 2 +- ax/service/scheduler.py | 65 --- ax/service/tests/scheduler_test_utils.py | 36 +- ax/service/tests/test_best_point_utils.py | 100 ++++ ax/service/tests/test_report_utils.py | 102 +--- ax/service/utils/best_point_mixin.py | 74 +++ ax/service/utils/best_point_utils.py | 470 ++++++++++++++++++ ax/service/utils/report_utils.py | 450 +---------------- sphinx/source/service.rst | 5 + tutorials/external_generation_node.ipynb | 2 +- tutorials/modular_botax.ipynb | 2 +- tutorials/multiobjective_optimization.ipynb | 2 +- tutorials/saasbo_nehvi.ipynb | 2 +- tutorials/scheduler.ipynb | 2 +- tutorials/submitit.ipynb | 2 +- 18 files changed, 685 insertions(+), 638 deletions(-) create mode 100644 ax/service/utils/best_point_utils.py diff --git a/ax/modelbridge/tests/test_torch_moo_modelbridge.py b/ax/modelbridge/tests/test_torch_moo_modelbridge.py index 939336d8fbf..e70976ac14d 100644 --- a/ax/modelbridge/tests/test_torch_moo_modelbridge.py +++ b/ax/modelbridge/tests/test_torch_moo_modelbridge.py @@ -37,7 +37,7 @@ infer_objective_thresholds, pareto_frontier_evaluator, ) -from ax.service.utils.report_utils import exp_to_df +from ax.service.utils.best_point_utils import exp_to_df from ax.utils.common.random import set_rng_seed from ax.utils.common.testutils import TestCase from ax.utils.common.typeutils import checked_cast diff --git a/ax/plot/parallel_coordinates.py b/ax/plot/parallel_coordinates.py index fcbb93e9917..907216a17e5 100644 --- a/ax/plot/parallel_coordinates.py +++ b/ax/plot/parallel_coordinates.py @@ -10,7 +10,8 @@ import pandas as pd from ax.core.experiment import Experiment from ax.plot.base import AxPlotConfig, AxPlotTypes -from ax.service.utils.report_utils import _get_shortest_unique_suffix_dict, exp_to_df +from ax.service.utils.best_point_utils import exp_to_df +from ax.service.utils.report_utils import _get_shortest_unique_suffix_dict from plotly import express as px, graph_objs as go diff --git a/ax/plot/tests/test_traces.py b/ax/plot/tests/test_traces.py index c69c7b9417a..37a27378909 100644 --- a/ax/plot/tests/test_traces.py +++ b/ax/plot/tests/test_traces.py @@ -15,7 +15,7 @@ optimization_trace_single_method_plotly, plot_objective_value_vs_trial_index, ) -from ax.service.utils.report_utils import exp_to_df +from ax.service.utils.best_point_utils import exp_to_df from ax.utils.common.testutils import TestCase from ax.utils.testing.core_stubs import get_branin_experiment from ax.utils.testing.mock import mock_botorch_optimize diff --git a/ax/service/ax_client.py b/ax/service/ax_client.py index 4e48cddb621..b9ce5ee8b2b 100644 --- a/ax/service/ax_client.py +++ b/ax/service/ax_client.py @@ -68,12 +68,12 @@ from ax.plot.trace import optimization_trace_single_method from ax.service.utils.analysis_base import AnalysisBase from ax.service.utils.best_point_mixin import BestPointMixin +from ax.service.utils.best_point_utils import exp_to_df from ax.service.utils.instantiation import ( FixedFeatures, InstantiationBase, ObjectiveProperties, ) -from ax.service.utils.report_utils import exp_to_df from ax.service.utils.with_db_settings_base import DBSettings from ax.storage.json_store.decoder import ( generation_strategy_from_json, diff --git a/ax/service/scheduler.py b/ax/service/scheduler.py index 1bbed5705bf..502cd523586 100644 --- a/ax/service/scheduler.py +++ b/ax/service/scheduler.py @@ -1021,71 +1021,6 @@ def summarize_final_result(self) -> OptimizationResult: """ return OptimizationResult() - def get_improvement_over_baseline( - self, - baseline_arm_name: str | None = None, - ) -> float: - """Returns the scalarized improvement over baseline, if applicable. - - Returns: - For Single Objective cases, returns % improvement of objective. - Positive indicates improvement over baseline. Negative indicates regression. - For Multi Objective cases, throws NotImplementedError - """ - if self.experiment.is_moo_problem: - raise NotImplementedError( - "`get_improvement_over_baseline` not yet implemented" - + " for multi-objective problems." - ) - if not baseline_arm_name: - raise UserInputError( - "`get_improvement_over_baseline` missing required parameter: " - + f"{baseline_arm_name=}, " - ) - - optimization_config = self.experiment.optimization_config - if not optimization_config: - raise ValueError("No optimization config found.") - - objective_metric_name = optimization_config.objective.metric.name - - # get the baseline trial - data = self.experiment.lookup_data().df - data = data[data["arm_name"] == baseline_arm_name] - if len(data) == 0: - raise UserInputError( - "`get_improvement_over_baseline`" - " could not find baseline arm" - f" `{baseline_arm_name}` in the experiment data." - ) - data = data[data["metric_name"] == objective_metric_name] - baseline_value = data.iloc[0]["mean"] - - # Find objective value of the best trial - idx, param, best_arm = none_throws( - self.get_best_trial( - optimization_config=optimization_config, use_model_predictions=False - ) - ) - best_arm = none_throws(best_arm) - best_obj_value = best_arm[0][objective_metric_name] - - def percent_change(x: float, y: float, minimize: bool) -> float: - if x == 0: - raise ZeroDivisionError( - "Cannot compute percent improvement when denom is zero" - ) - percent_change = (y - x) / abs(x) * 100 - if minimize: - percent_change = -percent_change - return percent_change - - return percent_change( - x=baseline_value, - y=best_obj_value, - minimize=optimization_config.objective.minimize, - ) - def _check_if_failure_rate_exceeded(self, force_check: bool = False) -> bool: """Checks if the failure rate (set in scheduler options) has been exceeded at any point during the optimization. diff --git a/ax/service/tests/scheduler_test_utils.py b/ax/service/tests/scheduler_test_utils.py index f43e8f14a9d..18de330a8ef 100644 --- a/ax/service/tests/scheduler_test_utils.py +++ b/ax/service/tests/scheduler_test_utils.py @@ -2197,6 +2197,8 @@ def test_get_improvement_over_baseline(self) -> None: scheduler.experiment.trials[0].lookup_data().df["arm_name"].iloc[0] ) percent_improvement = scheduler.get_improvement_over_baseline( + experiment=scheduler.experiment, + generation_strategy=scheduler.standard_generation_strategy, baseline_arm_name=first_trial_name, ) @@ -2209,11 +2211,7 @@ def test_get_improvement_over_baseline_robustness_not_implemented(self) -> None: self.branin_experiment.optimization_config = ( get_branin_multi_objective_optimization_config() ) - - gs = self._get_generation_strategy_strategy_for_test( - experiment=self.branin_experiment, - generation_strategy=self.sobol_MBM_GS, - ) + gs = self.sobol_MBM_GS scheduler = Scheduler( experiment=self.branin_experiment, @@ -2227,6 +2225,8 @@ def test_get_improvement_over_baseline_robustness_not_implemented(self) -> None: with self.assertRaises(NotImplementedError): scheduler.get_improvement_over_baseline( + experiment=scheduler.experiment, + generation_strategy=scheduler.standard_generation_strategy, baseline_arm_name=None, ) @@ -2236,10 +2236,7 @@ def test_get_improvement_over_baseline_robustness_user_input_error(self) -> None experiment.name = f"{self.branin_experiment.name}_but_moo" experiment.runner = self.runner - gs = self._get_generation_strategy_strategy_for_test( - experiment=experiment, - generation_strategy=self.two_sobol_steps_GS, - ) + gs = self.two_sobol_steps_GS scheduler = Scheduler( experiment=self.branin_experiment, # Has runner and metrics. generation_strategy=gs, @@ -2251,8 +2248,10 @@ def test_get_improvement_over_baseline_robustness_user_input_error(self) -> None db_settings=self.db_settings_if_always_needed, ) - with self.assertRaises(UserInputError): + with self.assertRaises(ValueError): scheduler.get_improvement_over_baseline( + experiment=scheduler.experiment, + generation_strategy=scheduler.standard_generation_strategy, baseline_arm_name=None, ) @@ -2267,19 +2266,20 @@ def test_get_improvement_over_baseline_robustness_user_input_error(self) -> None scheduler.experiment = exp_copy with self.assertRaises(ValueError): - scheduler.get_improvement_over_baseline(baseline_arm_name="baseline") + scheduler.get_improvement_over_baseline( + experiment=scheduler.experiment, + generation_strategy=scheduler.standard_generation_strategy, + baseline_arm_name="baseline", + ) def test_get_improvement_over_baseline_no_baseline(self) -> None: """Test that get_improvement_over_baseline returns UserInputError when baseline is not found in data.""" n_total_trials = 8 - gs = self._get_generation_strategy_strategy_for_test( - experiment=self.branin_experiment, - generation_strategy=self.two_sobol_steps_GS, - ) - + experiment = self.branin_experiment + gs = self.two_sobol_steps_GS scheduler = Scheduler( - experiment=self.branin_experiment, # Has runner and metrics. + experiment=experiment, # Has runner and metrics. generation_strategy=gs, options=SchedulerOptions( total_trials=n_total_trials, @@ -2293,6 +2293,8 @@ def test_get_improvement_over_baseline_no_baseline(self) -> None: with self.assertRaises(UserInputError): scheduler.get_improvement_over_baseline( + experiment=experiment, + generation_strategy=gs, baseline_arm_name="baseline_arm_not_in_data", ) diff --git a/ax/service/tests/test_best_point_utils.py b/ax/service/tests/test_best_point_utils.py index 9672db9dcff..135779cf607 100644 --- a/ax/service/tests/test_best_point_utils.py +++ b/ax/service/tests/test_best_point_utils.py @@ -6,6 +6,7 @@ # pyre-strict +import copy import random from unittest.mock import MagicMock, patch @@ -14,7 +15,9 @@ from ax.core.arm import Arm from ax.core.batch_trial import BatchTrial from ax.core.data import Data +from ax.core.experiment import Experiment from ax.core.generator_run import GeneratorRun +from ax.core.metric import Metric from ax.core.objective import ScalarizedObjective from ax.core.optimization_config import OptimizationConfig from ax.core.outcome_constraint import OutcomeConstraint @@ -32,10 +35,13 @@ get_best_raw_objective_point, logger as best_point_logger, ) +from ax.service.utils.best_point_utils import select_baseline_arm +from ax.service.utils.report_utils import BASELINE_ARM_NAME from ax.utils.common.testutils import TestCase from ax.utils.testing.core_stubs import ( get_branin_experiment, get_branin_metric, + get_branin_search_space, get_experiment_with_observations, get_sobol, ) @@ -556,6 +562,100 @@ def test_is_row_feasible(self) -> None: df.index, feasible_series.index, check_names=False ) + def test_compare_to_baseline_select_baseline_arm(self) -> None: + OBJECTIVE_METRIC = "objective" + true_obj_metric = Metric(name=OBJECTIVE_METRIC, lower_is_better=True) + experiment = Experiment( + search_space=get_branin_search_space(), + tracking_metrics=[true_obj_metric], + ) + + # specified baseline + data = [ + { + "trial_index": 0, + "arm_name": "m_0", + OBJECTIVE_METRIC: 0.2, + }, + { + "trial_index": 1, + "arm_name": BASELINE_ARM_NAME, + OBJECTIVE_METRIC: 0.2, + }, + { + "trial_index": 2, + "arm_name": "status_quo", + OBJECTIVE_METRIC: 0.2, + }, + ] + arms_df = pd.DataFrame(data) + self.assertEqual( + select_baseline_arm( + experiment=experiment, + arms_df=arms_df, + baseline_arm_name=BASELINE_ARM_NAME, + ), + (BASELINE_ARM_NAME, False), + ) + + # specified baseline arm not in trial + wrong_baseline_name = "wrong_baseline_name" + with self.assertRaisesRegex( + ValueError, + "compare_to_baseline: baseline row: .*" + " not found in arms", + ): + select_baseline_arm( + experiment=experiment, + arms_df=arms_df, + baseline_arm_name=wrong_baseline_name, + ) + + # status quo baseline arm + experiment_with_status_quo = copy.deepcopy(experiment) + experiment_with_status_quo.status_quo = Arm( + name="status_quo", + parameters={"x1": 0, "x2": 0}, + ) + self.assertEqual( + select_baseline_arm( + experiment=experiment_with_status_quo, + arms_df=arms_df, + baseline_arm_name=None, + ), + ("status_quo", False), + ) + # first arm from trials + custom_arm = Arm(name="m_0", parameters={"x1": 0.1, "x2": 0.2}) + experiment.new_trial().add_arm(custom_arm) + self.assertEqual( + select_baseline_arm( + experiment=experiment, + arms_df=arms_df, + baseline_arm_name=None, + ), + ("m_0", True), + ) + + # none selected + experiment_with_no_valid_baseline = Experiment( + search_space=get_branin_search_space(), + tracking_metrics=[true_obj_metric], + ) + experiment_with_no_valid_baseline.status_quo = Arm( + name="not found", + parameters={"x1": 0, "x2": 0}, + ) + custom_arm = Arm(name="also not found", parameters={"x1": 0.1, "x2": 0.2}) + experiment_with_no_valid_baseline.new_trial().add_arm(custom_arm) + with self.assertRaisesRegex( + ValueError, "compare_to_baseline: could not find valid baseline arm" + ): + select_baseline_arm( + experiment=experiment_with_no_valid_baseline, + arms_df=arms_df, + baseline_arm_name=None, + ) + def _repeat_elements(list_to_replicate: list[bool], n_repeats: int) -> pd.Series: return pd.Series([item for item in list_to_replicate for _ in range(n_repeats)]) diff --git a/ax/service/tests/test_report_utils.py b/ax/service/tests/test_report_utils.py index 653693166b2..d933570c5c2 100644 --- a/ax/service/tests/test_report_utils.py +++ b/ax/service/tests/test_report_utils.py @@ -6,7 +6,6 @@ # pyre-strict -import copy import itertools from collections import namedtuple from logging import INFO, WARN @@ -27,6 +26,7 @@ from ax.modelbridge.generation_strategy import GenerationStrategy from ax.modelbridge.registry import Models from ax.service.scheduler import Scheduler +from ax.service.utils.best_point_utils import exp_to_df from ax.service.utils.report_utils import ( _format_comparison_string, _get_cross_validation_plots, @@ -39,12 +39,10 @@ BASELINE_ARM_NAME, compare_to_baseline, compute_maximum_map_values, - exp_to_df, Experiment, FEASIBLE_COL_NAME, get_standard_plots, plot_feature_importance_by_feature_plotly, - select_baseline_arm, warn_if_unpredictable_metrics, ) from ax.service.utils.scheduler_options import SchedulerOptions @@ -84,7 +82,7 @@ class ReportUtilsTest(TestCase): @patch( - "ax.service.utils.report_utils._merge_results_if_no_duplicates", + "ax.service.utils.best_point_utils._merge_results_if_no_duplicates", autospec=True, return_value=pd.DataFrame( [ @@ -109,7 +107,7 @@ def test_exp_to_df_row_ordering(self, _) -> None: self.assertEqual(row["trial_index"], idx) @patch( - "ax.service.utils.report_utils._merge_results_if_no_duplicates", + "ax.service.utils.best_point_utils._merge_results_if_no_duplicates", autospec=True, return_value=pd.DataFrame( [ @@ -1176,100 +1174,6 @@ def test_compare_to_baseline_equal(self) -> None: self.assertIsNone(result) - def test_compare_to_baseline_select_baseline_arm(self) -> None: - OBJECTIVE_METRIC = "objective" - true_obj_metric = Metric(name=OBJECTIVE_METRIC, lower_is_better=True) - experiment = Experiment( - search_space=get_branin_search_space(), - tracking_metrics=[true_obj_metric], - ) - - # specified baseline - data = [ - { - "trial_index": 0, - "arm_name": "m_0", - OBJECTIVE_METRIC: 0.2, - }, - { - "trial_index": 1, - "arm_name": BASELINE_ARM_NAME, - OBJECTIVE_METRIC: 0.2, - }, - { - "trial_index": 2, - "arm_name": "status_quo", - OBJECTIVE_METRIC: 0.2, - }, - ] - arms_df = pd.DataFrame(data) - self.assertEqual( - select_baseline_arm( - experiment=experiment, - arms_df=arms_df, - baseline_arm_name=BASELINE_ARM_NAME, - ), - (BASELINE_ARM_NAME, False), - ) - - # specified baseline arm not in trial - wrong_baseline_name = "wrong_baseline_name" - with self.assertRaisesRegex( - ValueError, - "compare_to_baseline: baseline row: .*" + " not found in arms", - ): - select_baseline_arm( - experiment=experiment, - arms_df=arms_df, - baseline_arm_name=wrong_baseline_name, - ) - - # status quo baseline arm - experiment_with_status_quo = copy.deepcopy(experiment) - experiment_with_status_quo.status_quo = Arm( - name="status_quo", - parameters={"x1": 0, "x2": 0}, - ) - self.assertEqual( - select_baseline_arm( - experiment=experiment_with_status_quo, - arms_df=arms_df, - baseline_arm_name=None, - ), - ("status_quo", False), - ) - # first arm from trials - custom_arm = Arm(name="m_0", parameters={"x1": 0.1, "x2": 0.2}) - experiment.new_trial().add_arm(custom_arm) - self.assertEqual( - select_baseline_arm( - experiment=experiment, - arms_df=arms_df, - baseline_arm_name=None, - ), - ("m_0", True), - ) - - # none selected - experiment_with_no_valid_baseline = Experiment( - search_space=get_branin_search_space(), - tracking_metrics=[true_obj_metric], - ) - experiment_with_no_valid_baseline.status_quo = Arm( - name="not found", - parameters={"x1": 0, "x2": 0}, - ) - custom_arm = Arm(name="also not found", parameters={"x1": 0.1, "x2": 0.2}) - experiment_with_no_valid_baseline.new_trial().add_arm(custom_arm) - with self.assertRaisesRegex( - ValueError, "compare_to_baseline: could not find valid baseline arm" - ): - select_baseline_arm( - experiment=experiment_with_no_valid_baseline, - arms_df=arms_df, - baseline_arm_name=None, - ) - def test_warn_if_unpredictable_metrics(self) -> None: expected_msg = ( "The following metric(s) are behaving unpredictably and may be noisy or " diff --git a/ax/service/utils/best_point_mixin.py b/ax/service/utils/best_point_mixin.py index 5996bac164c..82380df14b5 100644 --- a/ax/service/utils/best_point_mixin.py +++ b/ax/service/utils/best_point_mixin.py @@ -21,6 +21,7 @@ OptimizationConfig, ) from ax.core.types import TModelPredictArm, TParameterization +from ax.exceptions.core import UserInputError from ax.modelbridge.generation_strategy import GenerationStrategy from ax.modelbridge.modelbridge_utils import ( extract_objective_thresholds, @@ -43,6 +44,7 @@ extract_Y_from_data, fill_missing_thresholds_from_nadir, ) +from ax.service.utils.best_point_utils import exp_to_df, select_baseline_arm from ax.utils.common.logger import get_logger from ax.utils.common.typeutils import checked_cast from botorch.utils.multi_objective.box_decompositions import DominatedPartitioning @@ -637,3 +639,75 @@ def _get_trace_by_progression( # pyre-fixme[16]: Item `List` of `Union[List[float], ndarray[typing.Any, # np.dtype[typing.Any]]]` has no attribute `squeeze`. return best_observed.tolist(), bins.squeeze(axis=0).tolist() + + def get_improvement_over_baseline( + self, + experiment: Experiment, + generation_strategy: GenerationStrategy, + baseline_arm_name: str | None = None, + ) -> float: + """Returns the scalarized improvement over baseline, if applicable. + + Returns: + For Single Objective cases, returns % improvement of objective. + Positive indicates improvement over baseline. Negative indicates regression. + For Multi Objective cases, throws NotImplementedError + """ + if experiment.is_moo_problem: + raise NotImplementedError( + "`get_improvement_over_baseline` not yet implemented" + + " for multi-objective problems." + ) + + if not baseline_arm_name: + baseline_arm_name, _ = select_baseline_arm( + experiment=experiment, + arms_df=exp_to_df(experiment), + baseline_arm_name=baseline_arm_name, + ) + + optimization_config = experiment.optimization_config + if not optimization_config: + raise ValueError("No optimization config found.") + + objective_metric_name = optimization_config.objective.metric.name + + # get the baseline trial + data = experiment.lookup_data().df + data = data[data["arm_name"] == baseline_arm_name] + if len(data) == 0: + raise UserInputError( + "`get_improvement_over_baseline`" + " could not find baseline arm" + f" `{baseline_arm_name}` in the experiment data." + ) + data = data[data["metric_name"] == objective_metric_name] + baseline_value = data.iloc[0]["mean"] + + # Find objective value of the best trial + idx, param, best_arm = none_throws( + self._get_best_trial( + experiment=experiment, + generation_strategy=generation_strategy, + optimization_config=optimization_config, + use_model_predictions=False, + ) + ) + best_arm = none_throws(best_arm) + best_obj_value = best_arm[0][objective_metric_name] + + def percent_change(x: float, y: float, minimize: bool) -> float: + if x == 0: + raise ZeroDivisionError( + "Cannot compute percent improvement when denom is zero" + ) + percent_change = (y - x) / abs(x) * 100 + if minimize: + percent_change = -percent_change + return percent_change + + return percent_change( + x=baseline_value, + y=best_obj_value, + minimize=optimization_config.objective.minimize, + ) diff --git a/ax/service/utils/best_point_utils.py b/ax/service/utils/best_point_utils.py new file mode 100644 index 00000000000..9229e651edf --- /dev/null +++ b/ax/service/utils/best_point_utils.py @@ -0,0 +1,470 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-strict + +from collections.abc import Callable + +from logging import Logger +from typing import Any + +import pandas as pd +from ax.core.base_trial import BaseTrial +from ax.core.experiment import Experiment +from ax.core.generator_run import GeneratorRunType +from ax.core.metric import Metric +from ax.core.multi_type_experiment import MultiTypeExperiment +from ax.exceptions.core import DataRequiredError +from ax.service.utils.best_point import _derel_opt_config_wrapper, _is_row_feasible +from ax.utils.common.logger import get_logger +from pandas import DataFrame +from pyre_extensions import none_throws + +logger: Logger = get_logger(__name__) + +FEASIBLE_COL_NAME = "is_feasible" +BASELINE_ARM_NAME = "baseline_arm" + + +def select_baseline_arm( + experiment: Experiment, + arms_df: pd.DataFrame, + baseline_arm_name: str | None, +) -> tuple[str, bool]: + """ + Choose a baseline arm that is found in arms_df + + Returns: + Tuple: + baseline_arm_name if valid baseline exists + true when baseline selected from first arm of sweep + raise ValueError if no valid baseline found + """ + + if baseline_arm_name: + if arms_df[arms_df["arm_name"] == baseline_arm_name].empty: + raise ValueError( + f"compare_to_baseline: baseline row: {baseline_arm_name=}" + " not found in arms" + ) + return baseline_arm_name, False + + else: + if ( + experiment.status_quo + and not arms_df[ + arms_df["arm_name"] == none_throws(experiment.status_quo).name + ].empty + ): + baseline_arm_name = none_throws(experiment.status_quo).name + return baseline_arm_name, False + + if ( + experiment.trials + and experiment.trials[0].arms + and not arms_df[ + arms_df["arm_name"] == experiment.trials[0].arms[0].name + ].empty + ): + baseline_arm_name = experiment.trials[0].arms[0].name + return baseline_arm_name, True + else: + raise ValueError("compare_to_baseline: could not find valid baseline arm") + + +def exp_to_df( + exp: Experiment, + metrics: list[Metric] | None = None, + run_metadata_fields: list[str] | None = None, + trial_properties_fields: list[str] | None = None, + trial_attribute_fields: list[str] | None = None, + additional_fields_callables: None + | (dict[str, Callable[[Experiment], dict[int, str | float]]]) = None, + always_include_field_columns: bool = False, + show_relative_metrics: bool = False, + **kwargs: Any, +) -> pd.DataFrame: + """Transforms an experiment to a DataFrame with rows keyed by trial_index + and arm_name, metrics pivoted into one row. If the pivot results in more than + one row per arm (or one row per ``arm * map_keys`` combination if ``map_keys`` are + present), results are omitted and warning is produced. Only supports + ``Experiment``. + + Transforms an ``Experiment`` into a ``pd.DataFrame``. + + Args: + exp: An ``Experiment`` that may have pending trials. + metrics: Override list of metrics to return. Return all metrics if ``None``. + run_metadata_fields: Fields to extract from ``trial.run_metadata`` for trial + in ``experiment.trials``. If there are multiple arms per trial, these + fields will be replicated across the arms of a trial. + trial_properties_fields: Fields to extract from ``trial._properties`` for trial + in ``experiment.trials``. If there are multiple arms per trial, these + fields will be replicated across the arms of a trial. Output columns names + will be prepended with ``"trial_properties_"``. + trial_attribute_fields: Fields to extract from trial attributes for each trial + in ``experiment.trials``. If there are multiple arms per trial, these + fields will be replicated across the arms of a trial. + additional_fields_callables: A dictionary of field names to callables, with + each being a function from `experiment` to a `trials_dict` of the form + {trial_index: value}. An example of a custom callable like this is the + function `compute_maximum_map_values`. + always_include_field_columns: If `True`, even if all trials have missing + values, include field columns anyway. Such columns are by default + omitted (False). + show_relative_metrics: If `True`, show % metric changes relative to the provided + status quo arm. If no status quo arm is provided, raise a warning and show + raw metric values. If `False`, show raw metric values (default). + Returns: + DataFrame: A dataframe of inputs, metadata and metrics by trial and arm (and + ``map_keys``, if present). If no trials are available, returns an empty + dataframe. If no metric ouputs are available, returns a dataframe of inputs and + metadata. Columns include: + * trial_index + * arm_name + * trial_status + * generation_method + * any elements of exp.runner.run_metadata_report_keys that are present in + the trial.run_metadata of each trial + * one column per metric (named after the metric.name) + * one column per parameter (named after the parameter.name) + """ + + if len(kwargs) > 0: + logger.warning( + "`kwargs` in exp_to_df is deprecated. Please remove extra arguments." + ) + + # Accept Experiment and SimpleExperiment + if isinstance(exp, MultiTypeExperiment): + raise ValueError("Cannot transform MultiTypeExperiments to DataFrames.") + + key_components = ["trial_index", "arm_name"] + + # Get each trial-arm with parameters + arms_df = pd.DataFrame( + [ + { + "arm_name": arm.name, + "trial_index": trial_index, + **arm.parameters, + } + for trial_index, trial in exp.trials.items() + for arm in trial.arms + ] + ) + # Fetch results. + data = exp.lookup_data() + results = data.df + + # Filter metrics. + if metrics is not None: + metric_names = [m.name for m in metrics] + results = results[results["metric_name"].isin(metric_names)] + + # Add `FEASIBLE_COL_NAME` column according to constraints if any. + if ( + exp.optimization_config is not None + and len(none_throws(exp.optimization_config).all_constraints) > 0 + ): + optimization_config = none_throws(exp.optimization_config) + try: + if any(oc.relative for oc in optimization_config.all_constraints): + optimization_config = _derel_opt_config_wrapper( + optimization_config=optimization_config, + experiment=exp, + ) + results[FEASIBLE_COL_NAME] = _is_row_feasible( + df=results, + optimization_config=optimization_config, + ) + except (KeyError, ValueError, DataRequiredError) as e: + logger.warning(f"Feasibility calculation failed with error: {e}") + + # Calculate relative metrics if `show_relative_metrics` is True. + if show_relative_metrics: + if exp.status_quo is None: + logger.warning( + "No status quo arm found. Showing raw metric values instead of " + "relative metric values." + ) + else: + status_quo_arm_name = exp.status_quo.name + try: + results = _get_relative_results(results, status_quo_arm_name) + except Exception: + logger.warning( + "Failed to calculate relative metrics. Showing raw metric values " + "instead of relative metric values." + ) + + # If arms_df is empty, return empty results (legacy behavior) + if len(arms_df.index) == 0: + if len(results.index) != 0: + raise ValueError( + "exp.lookup_data().df returned more rows than there are experimental " + "arms. This is an inconsistent experimental state. Please report to " + "Ax support." + ) + return results + + # Create key column from key_components + arms_df["trial_index"] = arms_df["trial_index"].astype(int) + + # Add trial status + trials = exp.trials.items() + trial_to_status = {index: trial.status.name for index, trial in trials} + _merge_trials_dict_with_df( + df=arms_df, trials_dict=trial_to_status, column_name="trial_status" + ) + + # Add trial reason for failed or abandoned trials + trial_to_reason = { + index: ( + f"{trial.failed_reason[:15]}..." + if trial.status.is_failed and trial.failed_reason is not None + else ( + f"{trial.abandoned_reason[:15]}..." + if trial.status.is_abandoned and trial.abandoned_reason is not None + else None + ) + ) + for index, trial in trials + } + + _merge_trials_dict_with_df( + df=arms_df, + trials_dict=trial_to_reason, + column_name="reason", + ) + + # Add generation_method, accounting for the generic case that generator_runs is of + # arbitrary length. Repeated methods within a trial are condensed via `set` and an + # empty set will yield "Unknown" as the method. + trial_to_generation_method = { + trial_index: _get_generation_method_str(trial) for trial_index, trial in trials + } + + _merge_trials_dict_with_df( + df=arms_df, + trials_dict=trial_to_generation_method, + column_name="generation_method", + ) + + # Add any trial properties fields to arms_df + if trial_properties_fields is not None: + # add trial._properties fields + for field in trial_properties_fields: + trial_to_properties_field = { + trial_index: ( + trial._properties[field] if field in trial._properties else None + ) + for trial_index, trial in trials + } + _merge_trials_dict_with_df( + df=arms_df, + trials_dict=trial_to_properties_field, + column_name="trial_properties_" + field, + always_include_field_column=always_include_field_columns, + ) + + # Add any run_metadata fields to arms_df + if run_metadata_fields is not None: + # add run_metadata fields + for field in run_metadata_fields: + trial_to_metadata_field = { + trial_index: ( + trial.run_metadata[field] if field in trial.run_metadata else None + ) + for trial_index, trial in trials + } + _merge_trials_dict_with_df( + df=arms_df, + trials_dict=trial_to_metadata_field, + column_name=field, + always_include_field_column=always_include_field_columns, + ) + + # Add any trial attributes fields to arms_df + if trial_attribute_fields is not None: + # add trial attribute fields + for field in trial_attribute_fields: + trial_to_attribute_field = { + trial_index: (getattr(trial, field) if hasattr(trial, field) else None) + for trial_index, trial in trials + } + _merge_trials_dict_with_df( + df=arms_df, + trials_dict=trial_to_attribute_field, + column_name=field, + always_include_field_column=always_include_field_columns, + ) + + # Add additional fields to arms_df + if additional_fields_callables is not None: + for field, func in additional_fields_callables.items(): + trial_to_additional_field = func(exp) + _merge_trials_dict_with_df( + df=arms_df, + trials_dict=trial_to_additional_field, + column_name=field, + always_include_field_column=always_include_field_columns, + ) + + exp_df = _merge_results_if_no_duplicates( + arms_df=arms_df, + results=results, + key_components=key_components, + metrics=metrics or list(exp.metrics.values()), + ) + + exp_df = none_throws(none_throws(exp_df).sort_values(["trial_index"])) + initial_column_order = ( + ["trial_index", "arm_name", "trial_status", "reason", "generation_method"] + + (run_metadata_fields or []) + + (trial_properties_fields or []) + + ([FEASIBLE_COL_NAME] if FEASIBLE_COL_NAME in exp_df.columns else []) + ) + for column_name in reversed(initial_column_order): + if column_name in exp_df.columns: + exp_df.insert(0, column_name, exp_df.pop(column_name)) + return exp_df.reset_index(drop=True) + + +def _get_relative_results( + results_df: pd.DataFrame, status_quo_arm_name: str +) -> pd.DataFrame: + """Returns a dataframe with relative results, i.e. % change in metric values + relative to the status quo arm. + """ + baseline_df = results_df[results_df["arm_name"] == status_quo_arm_name] + relative_results_df = pd.merge( + results_df, + baseline_df[["metric_name", "mean"]], + on="metric_name", + suffixes=("", "_baseline"), + ) + relative_results_df["mean"] = ( + 1.0 * relative_results_df["mean"] / relative_results_df["mean_baseline"] - 1.0 + ) * 100.0 + relative_results_df["metric_name"] = relative_results_df["metric_name"] + "_%CH" + return relative_results_df + + +def _merge_trials_dict_with_df( + df: pd.DataFrame, + # pyre-fixme[2]: Parameter annotation cannot contain `Any`. + trials_dict: dict[int, Any], + column_name: str, + always_include_field_column: bool = False, +) -> None: + """Add a column ``column_name`` to a DataFrame ``df`` containing a column + ``trial_index``. Each value of the new column is given by the element of + ``trials_dict`` indexed by ``trial_index``. + + Args: + df: Pandas DataFrame with column ``trial_index``, to be appended with a new + column. + trials_dict: Dict mapping each ``trial_index`` to a value. The new column of + df will be populated with the value corresponding with the + ``trial_index`` of each row. + column_name: Name of the column to be appended to ``df``. + always_include_field_column: Even if all trials have missing values, + include the column. + """ + + if "trial_index" not in df.columns: + raise ValueError("df must have trial_index column") + + # field present for some trial + if always_include_field_column or any(trials_dict.values()): + if not all( + v is not None for v in trials_dict.values() + ): # not present for all trials + logger.info( + f"Column {column_name} missing for some trials. " + "Filling with None when missing." + ) + df[column_name] = [trials_dict[trial_index] for trial_index in df.trial_index] + + +def _get_generation_method_str(trial: BaseTrial) -> str: + trial_generation_property = trial._properties.get("generation_model_key") + if trial_generation_property is not None: + return trial_generation_property + + generation_methods = { + none_throws(generator_run._model_key) + for generator_run in trial.generator_runs + if generator_run._model_key is not None + } + + # add "Manual" if any generator_runs are manual + if any( + generator_run.generator_run_type == GeneratorRunType.MANUAL.name + for generator_run in trial.generator_runs + ): + generation_methods.add("Manual") + return ", ".join(generation_methods) if generation_methods else "Unknown" + + +def _merge_results_if_no_duplicates( + arms_df: pd.DataFrame, + results: pd.DataFrame, + key_components: list[str], + metrics: list[Metric], +) -> DataFrame: + """Formats ``data.df`` and merges it with ``arms_df`` if all of the following are + True: + - ``data.df`` is not empty + - ``data.df`` contains columns corresponding to ``key_components`` + - after any formatting, ``data.df`` contains no duplicates of the column + ``results_key_col`` + """ + if len(results.index) == 0: + logger.info( + f"No results present for the specified metrics `{metrics}`. " + "Returning arm parameters and metadata only." + ) + return arms_df + if not all(col in results.columns for col in key_components): + logger.warning( + f"At least one of key columns `{key_components}` not present in results df " + f"`{results}`. Returning arm parameters and metadata only." + ) + return arms_df + # prepare results for merge by concattenating the trial index with the arm name + # sparated by a comma + key_vals = pd.Series( + results[key_components].values.astype("str").tolist() + ).str.join(",") + + results_key_col = "-".join(key_components) + + # Reindex so new column isn't set to NaN. + key_vals.index = results.index + results[results_key_col] = key_vals + # Don't return results if duplicates remain + if any(results.duplicated(subset=[results_key_col, "metric_name"])): + logger.warning( + "Experimental results dataframe contains multiple rows with the same " + f"keys {results_key_col}. Returning dataframe without results." + ) + return arms_df + metric_vals = results.pivot( + index=results_key_col, columns="metric_name", values="mean" + ).reset_index() + + # dedupe results by key_components + metadata_cols = key_components + [results_key_col] + if FEASIBLE_COL_NAME in results.columns: + metadata_cols.append(FEASIBLE_COL_NAME) + metadata = results[metadata_cols].drop_duplicates() + metrics_df = pd.merge(metric_vals, metadata, on=results_key_col) + # drop synthetic key column + metrics_df = metrics_df.drop(results_key_col, axis=1) + # merge and return + return pd.merge(metrics_df, arms_df, on=key_components, how="outer") diff --git a/ax/service/utils/report_utils.py b/ax/service/utils/report_utils.py index b4bf34dd790..c37a7d98fa5 100644 --- a/ax/service/utils/report_utils.py +++ b/ax/service/utils/report_utils.py @@ -14,7 +14,7 @@ from collections.abc import Callable, Iterable from datetime import timedelta from logging import Logger -from typing import Any, cast, TYPE_CHECKING +from typing import cast, TYPE_CHECKING import gpytorch import numpy as np @@ -24,17 +24,13 @@ from ax.core.base_trial import TrialStatus from ax.core.data import Data from ax.core.experiment import Experiment -from ax.core.generator_run import GeneratorRunType from ax.core.map_data import MapData from ax.core.map_metric import MapMetric -from ax.core.metric import Metric -from ax.core.multi_type_experiment import MultiTypeExperiment from ax.core.objective import MultiObjective, ScalarizedObjective from ax.core.optimization_config import OptimizationConfig from ax.core.parameter import Parameter -from ax.core.trial import BaseTrial from ax.early_stopping.strategies.base import BaseEarlyStoppingStrategy -from ax.exceptions.core import DataRequiredError, UserInputError +from ax.exceptions.core import UserInputError from ax.modelbridge import ModelBridge from ax.modelbridge.cross_validation import ( compute_model_fit_metrics_from_modelbridge, @@ -60,12 +56,11 @@ map_data_multiple_metrics_dropdown_plotly, plot_objective_value_vs_trial_index, ) -from ax.service.utils.best_point import _derel_opt_config_wrapper, _is_row_feasible +from ax.service.utils.best_point_utils import exp_to_df, select_baseline_arm from ax.service.utils.early_stopping import get_early_stopping_metrics from ax.utils.common.logger import get_logger from ax.utils.common.typeutils import checked_cast from ax.utils.sensitivity.sobol_measures import ax_parameter_sens -from pandas.core.frame import DataFrame from pyre_extensions import none_throws if TYPE_CHECKING: @@ -648,401 +643,6 @@ def _get_curve_plot_dropdown( ) -def _merge_trials_dict_with_df( - df: pd.DataFrame, - # pyre-fixme[2]: Parameter annotation cannot contain `Any`. - trials_dict: dict[int, Any], - column_name: str, - always_include_field_column: bool = False, -) -> None: - """Add a column ``column_name`` to a DataFrame ``df`` containing a column - ``trial_index``. Each value of the new column is given by the element of - ``trials_dict`` indexed by ``trial_index``. - - Args: - df: Pandas DataFrame with column ``trial_index``, to be appended with a new - column. - trials_dict: Dict mapping each ``trial_index`` to a value. The new column of - df will be populated with the value corresponding with the - ``trial_index`` of each row. - column_name: Name of the column to be appended to ``df``. - always_include_field_column: Even if all trials have missing values, - include the column. - """ - - if "trial_index" not in df.columns: - raise ValueError("df must have trial_index column") - - # field present for some trial - if always_include_field_column or any(trials_dict.values()): - if not all( - v is not None for v in trials_dict.values() - ): # not present for all trials - logger.info( - f"Column {column_name} missing for some trials. " - "Filling with None when missing." - ) - df[column_name] = [trials_dict[trial_index] for trial_index in df.trial_index] - - -def _get_generation_method_str(trial: BaseTrial) -> str: - trial_generation_property = trial._properties.get("generation_model_key") - if trial_generation_property is not None: - return trial_generation_property - - generation_methods = { - none_throws(generator_run._model_key) - for generator_run in trial.generator_runs - if generator_run._model_key is not None - } - - # add "Manual" if any generator_runs are manual - if any( - generator_run.generator_run_type == GeneratorRunType.MANUAL.name - for generator_run in trial.generator_runs - ): - generation_methods.add("Manual") - return ", ".join(generation_methods) if generation_methods else "Unknown" - - -def _merge_results_if_no_duplicates( - arms_df: pd.DataFrame, - results: pd.DataFrame, - key_components: list[str], - metrics: list[Metric], -) -> DataFrame: - """Formats ``data.df`` and merges it with ``arms_df`` if all of the following are - True: - - ``data.df`` is not empty - - ``data.df`` contains columns corresponding to ``key_components`` - - after any formatting, ``data.df`` contains no duplicates of the column - ``results_key_col`` - """ - if len(results.index) == 0: - logger.info( - f"No results present for the specified metrics `{metrics}`. " - "Returning arm parameters and metadata only." - ) - return arms_df - if not all(col in results.columns for col in key_components): - logger.warning( - f"At least one of key columns `{key_components}` not present in results df " - f"`{results}`. Returning arm parameters and metadata only." - ) - return arms_df - # prepare results for merge by concattenating the trial index with the arm name - # sparated by a comma - key_vals = pd.Series( - results[key_components].values.astype("str").tolist() - ).str.join(",") - - results_key_col = "-".join(key_components) - - # Reindex so new column isn't set to NaN. - key_vals.index = results.index - results[results_key_col] = key_vals - # Don't return results if duplicates remain - if any(results.duplicated(subset=[results_key_col, "metric_name"])): - logger.warning( - "Experimental results dataframe contains multiple rows with the same " - f"keys {results_key_col}. Returning dataframe without results." - ) - return arms_df - metric_vals = results.pivot( - index=results_key_col, columns="metric_name", values="mean" - ).reset_index() - - # dedupe results by key_components - metadata_cols = key_components + [results_key_col] - if FEASIBLE_COL_NAME in results.columns: - metadata_cols.append(FEASIBLE_COL_NAME) - metadata = results[metadata_cols].drop_duplicates() - metrics_df = pd.merge(metric_vals, metadata, on=results_key_col) - # drop synthetic key column - metrics_df = metrics_df.drop(results_key_col, axis=1) - # merge and return - return pd.merge(metrics_df, arms_df, on=key_components, how="outer") - - -def _get_relative_results( - results_df: pd.DataFrame, status_quo_arm_name: str -) -> pd.DataFrame: - """Returns a dataframe with relative results, i.e. % change in metric values - relative to the status quo arm. - """ - baseline_df = results_df[results_df["arm_name"] == status_quo_arm_name] - relative_results_df = pd.merge( - results_df, - baseline_df[["metric_name", "mean"]], - on="metric_name", - suffixes=("", "_baseline"), - ) - relative_results_df["mean"] = ( - 1.0 * relative_results_df["mean"] / relative_results_df["mean_baseline"] - 1.0 - ) * 100.0 - relative_results_df["metric_name"] = relative_results_df["metric_name"] + "_%CH" - return relative_results_df - - -def exp_to_df( - exp: Experiment, - metrics: list[Metric] | None = None, - run_metadata_fields: list[str] | None = None, - trial_properties_fields: list[str] | None = None, - trial_attribute_fields: list[str] | None = None, - additional_fields_callables: None - | (dict[str, Callable[[Experiment], dict[int, str | float]]]) = None, - always_include_field_columns: bool = False, - show_relative_metrics: bool = False, - **kwargs: Any, -) -> pd.DataFrame: - """Transforms an experiment to a DataFrame with rows keyed by trial_index - and arm_name, metrics pivoted into one row. If the pivot results in more than - one row per arm (or one row per ``arm * map_keys`` combination if ``map_keys`` are - present), results are omitted and warning is produced. Only supports - ``Experiment``. - - Transforms an ``Experiment`` into a ``pd.DataFrame``. - - Args: - exp: An ``Experiment`` that may have pending trials. - metrics: Override list of metrics to return. Return all metrics if ``None``. - run_metadata_fields: Fields to extract from ``trial.run_metadata`` for trial - in ``experiment.trials``. If there are multiple arms per trial, these - fields will be replicated across the arms of a trial. - trial_properties_fields: Fields to extract from ``trial._properties`` for trial - in ``experiment.trials``. If there are multiple arms per trial, these - fields will be replicated across the arms of a trial. Output columns names - will be prepended with ``"trial_properties_"``. - trial_attribute_fields: Fields to extract from trial attributes for each trial - in ``experiment.trials``. If there are multiple arms per trial, these - fields will be replicated across the arms of a trial. - additional_fields_callables: A dictionary of field names to callables, with - each being a function from `experiment` to a `trials_dict` of the form - {trial_index: value}. An example of a custom callable like this is the - function `compute_maximum_map_values`. - always_include_field_columns: If `True`, even if all trials have missing - values, include field columns anyway. Such columns are by default - omitted (False). - show_relative_metrics: If `True`, show % metric changes relative to the provided - status quo arm. If no status quo arm is provided, raise a warning and show - raw metric values. If `False`, show raw metric values (default). - Returns: - DataFrame: A dataframe of inputs, metadata and metrics by trial and arm (and - ``map_keys``, if present). If no trials are available, returns an empty - dataframe. If no metric ouputs are available, returns a dataframe of inputs and - metadata. Columns include: - * trial_index - * arm_name - * trial_status - * generation_method - * any elements of exp.runner.run_metadata_report_keys that are present in - the trial.run_metadata of each trial - * one column per metric (named after the metric.name) - * one column per parameter (named after the parameter.name) - """ - - if len(kwargs) > 0: - logger.warning( - "`kwargs` in exp_to_df is deprecated. Please remove extra arguments." - ) - - # Accept Experiment and SimpleExperiment - if isinstance(exp, MultiTypeExperiment): - raise ValueError("Cannot transform MultiTypeExperiments to DataFrames.") - - key_components = ["trial_index", "arm_name"] - - # Get each trial-arm with parameters - arms_df = pd.DataFrame( - [ - { - "arm_name": arm.name, - "trial_index": trial_index, - **arm.parameters, - } - for trial_index, trial in exp.trials.items() - for arm in trial.arms - ] - ) - # Fetch results. - data = exp.lookup_data() - results = data.df - - # Filter metrics. - if metrics is not None: - metric_names = [m.name for m in metrics] - results = results[results["metric_name"].isin(metric_names)] - - # Add `FEASIBLE_COL_NAME` column according to constraints if any. - if ( - exp.optimization_config is not None - and len(none_throws(exp.optimization_config).all_constraints) > 0 - ): - optimization_config = none_throws(exp.optimization_config) - try: - if any(oc.relative for oc in optimization_config.all_constraints): - optimization_config = _derel_opt_config_wrapper( - optimization_config=optimization_config, - experiment=exp, - ) - results[FEASIBLE_COL_NAME] = _is_row_feasible( - df=results, - optimization_config=optimization_config, - ) - except (KeyError, ValueError, DataRequiredError) as e: - logger.warning(f"Feasibility calculation failed with error: {e}") - - # Calculate relative metrics if `show_relative_metrics` is True. - if show_relative_metrics: - if exp.status_quo is None: - logger.warning( - "No status quo arm found. Showing raw metric values instead of " - "relative metric values." - ) - else: - status_quo_arm_name = exp.status_quo.name - try: - results = _get_relative_results(results, status_quo_arm_name) - except Exception: - logger.warning( - "Failed to calculate relative metrics. Showing raw metric values " - "instead of relative metric values." - ) - - # If arms_df is empty, return empty results (legacy behavior) - if len(arms_df.index) == 0: - if len(results.index) != 0: - raise ValueError( - "exp.lookup_data().df returned more rows than there are experimental " - "arms. This is an inconsistent experimental state. Please report to " - "Ax support." - ) - return results - - # Create key column from key_components - arms_df["trial_index"] = arms_df["trial_index"].astype(int) - - # Add trial status - trials = exp.trials.items() - trial_to_status = {index: trial.status.name for index, trial in trials} - _merge_trials_dict_with_df( - df=arms_df, trials_dict=trial_to_status, column_name="trial_status" - ) - - # Add trial reason for failed or abandoned trials - trial_to_reason = { - index: ( - f"{trial.failed_reason[:15]}..." - if trial.status.is_failed and trial.failed_reason is not None - else ( - f"{trial.abandoned_reason[:15]}..." - if trial.status.is_abandoned and trial.abandoned_reason is not None - else None - ) - ) - for index, trial in trials - } - - _merge_trials_dict_with_df( - df=arms_df, - trials_dict=trial_to_reason, - column_name="reason", - ) - - # Add generation_method, accounting for the generic case that generator_runs is of - # arbitrary length. Repeated methods within a trial are condensed via `set` and an - # empty set will yield "Unknown" as the method. - trial_to_generation_method = { - trial_index: _get_generation_method_str(trial) for trial_index, trial in trials - } - - _merge_trials_dict_with_df( - df=arms_df, - trials_dict=trial_to_generation_method, - column_name="generation_method", - ) - - # Add any trial properties fields to arms_df - if trial_properties_fields is not None: - # add trial._properties fields - for field in trial_properties_fields: - trial_to_properties_field = { - trial_index: ( - trial._properties[field] if field in trial._properties else None - ) - for trial_index, trial in trials - } - _merge_trials_dict_with_df( - df=arms_df, - trials_dict=trial_to_properties_field, - column_name="trial_properties_" + field, - always_include_field_column=always_include_field_columns, - ) - - # Add any run_metadata fields to arms_df - if run_metadata_fields is not None: - # add run_metadata fields - for field in run_metadata_fields: - trial_to_metadata_field = { - trial_index: ( - trial.run_metadata[field] if field in trial.run_metadata else None - ) - for trial_index, trial in trials - } - _merge_trials_dict_with_df( - df=arms_df, - trials_dict=trial_to_metadata_field, - column_name=field, - always_include_field_column=always_include_field_columns, - ) - - # Add any trial attributes fields to arms_df - if trial_attribute_fields is not None: - # add trial attribute fields - for field in trial_attribute_fields: - trial_to_attribute_field = { - trial_index: (getattr(trial, field) if hasattr(trial, field) else None) - for trial_index, trial in trials - } - _merge_trials_dict_with_df( - df=arms_df, - trials_dict=trial_to_attribute_field, - column_name=field, - always_include_field_column=always_include_field_columns, - ) - - # Add additional fields to arms_df - if additional_fields_callables is not None: - for field, func in additional_fields_callables.items(): - trial_to_additional_field = func(exp) - _merge_trials_dict_with_df( - df=arms_df, - trials_dict=trial_to_additional_field, - column_name=field, - always_include_field_column=always_include_field_columns, - ) - - exp_df = _merge_results_if_no_duplicates( - arms_df=arms_df, - results=results, - key_components=key_components, - metrics=metrics or list(exp.metrics.values()), - ) - - exp_df = none_throws(none_throws(exp_df).sort_values(["trial_index"])) - initial_column_order = ( - ["trial_index", "arm_name", "trial_status", "reason", "generation_method"] - + (run_metadata_fields or []) - + (trial_properties_fields or []) - + ([FEASIBLE_COL_NAME] if FEASIBLE_COL_NAME in exp_df.columns else []) - ) - for column_name in reversed(initial_column_order): - if column_name in exp_df.columns: - exp_df.insert(0, column_name, exp_df.pop(column_name)) - return exp_df.reset_index(drop=True) - - def compute_maximum_map_values( experiment: Experiment, map_key: str | None = None ) -> dict[int, float]: @@ -1331,50 +931,6 @@ def _build_result_tuple( return result -def select_baseline_arm( - experiment: Experiment, arms_df: pd.DataFrame, baseline_arm_name: str | None -) -> tuple[str, bool]: - """ - Choose a baseline arm that is found in arms_df - - Returns: - Tuple: - baseline_arm_name if valid baseline exists - true when baseline selected from first arm of sweep - raise ValueError if no valid baseline found - """ - - if baseline_arm_name: - if arms_df[arms_df["arm_name"] == baseline_arm_name].empty: - raise ValueError( - f"compare_to_baseline: baseline row: {baseline_arm_name=}" - " not found in arms" - ) - return baseline_arm_name, False - - else: - if ( - experiment.status_quo - and not arms_df[ - arms_df["arm_name"] == none_throws(experiment.status_quo).name - ].empty - ): - baseline_arm_name = none_throws(experiment.status_quo).name - return baseline_arm_name, False - - if ( - experiment.trials - and experiment.trials[0].arms - and not arms_df[ - arms_df["arm_name"] == experiment.trials[0].arms[0].name - ].empty - ): - baseline_arm_name = experiment.trials[0].arms[0].name - return baseline_arm_name, True - else: - raise ValueError("compare_to_baseline: could not find valid baseline arm") - - def maybe_extract_baseline_comparison_values( experiment: Experiment, optimization_config: OptimizationConfig | None, diff --git a/sphinx/source/service.rst b/sphinx/source/service.rst index e66454a97bb..81cde832d91 100644 --- a/sphinx/source/service.rst +++ b/sphinx/source/service.rst @@ -67,6 +67,11 @@ Best Point Identification :show-inheritance: +.. automodule:: ax.service.utils.best_point + :members: + :undoc-members: + :show-inheritance: + .. automodule:: ax.service.utils.best_point :members: :undoc-members: diff --git a/tutorials/external_generation_node.ipynb b/tutorials/external_generation_node.ipynb index c1aac0b259d..44d200ed291 100644 --- a/tutorials/external_generation_node.ipynb +++ b/tutorials/external_generation_node.ipynb @@ -55,7 +55,7 @@ "from ax.modelbridge.transition_criterion import MaxTrials\n", "from ax.plot.trace import plot_objective_value_vs_trial_index\n", "from ax.service.ax_client import AxClient, ObjectiveProperties\n", - "from ax.service.utils.report_utils import exp_to_df\n", + "from ax.service.utils.best_point_utils import exp_to_df\n", "from ax.utils.common.typeutils import checked_cast\n", "from ax.utils.measurement.synthetic_functions import hartmann6\n", "from sklearn.ensemble import RandomForestRegressor\n", diff --git a/tutorials/modular_botax.ipynb b/tutorials/modular_botax.ipynb index dc8f72147ec..7161a9be70b 100644 --- a/tutorials/modular_botax.ipynb +++ b/tutorials/modular_botax.ipynb @@ -53,7 +53,7 @@ "from ax.models.torch.botorch_modular.utils import ModelConfig\n", "\n", "# Experiment examination utilities\n", - "from ax.service.utils.report_utils import exp_to_df\n", + "from ax.service.utils.best_point_utils import exp_to_df\n", "\n", "# Test Ax objects\n", "from ax.utils.testing.core_stubs import (\n", diff --git a/tutorials/multiobjective_optimization.ipynb b/tutorials/multiobjective_optimization.ipynb index c52e040f982..f14ec03bce4 100644 --- a/tutorials/multiobjective_optimization.ipynb +++ b/tutorials/multiobjective_optimization.ipynb @@ -365,7 +365,7 @@ "from ax.modelbridge.modelbridge_utils import observed_hypervolume\n", "from ax.modelbridge.registry import Models\n", "from ax.runners.synthetic import SyntheticRunner\n", - "from ax.service.utils.report_utils import exp_to_df\n", + "from ax.service.utils.best_point_utils import exp_to_df\n", "\n", "# BoTorch acquisition class for ParEGO\n", "from botorch.acquisition.multi_objective.parego import qLogNParEGO" diff --git a/tutorials/saasbo_nehvi.ipynb b/tutorials/saasbo_nehvi.ipynb index d49edb5f7b3..fa094e162be 100644 --- a/tutorials/saasbo_nehvi.ipynb +++ b/tutorials/saasbo_nehvi.ipynb @@ -159,7 +159,7 @@ "from ax.plot.pareto_frontier import plot_pareto_frontier\n", "from ax.plot.pareto_utils import compute_posterior_pareto_frontier\n", "from ax.runners.synthetic import SyntheticRunner\n", - "from ax.service.utils.report_utils import exp_to_df\n", + "from ax.service.utils.best_point_utils import exp_to_df\n", "\n", "# Plotting imports and initialization\n", "from ax.utils.notebook.plotting import init_notebook_plotting, render\n", diff --git a/tutorials/scheduler.ipynb b/tutorials/scheduler.ipynb index 6ca9dc5cae1..9e0eb486e0d 100644 --- a/tutorials/scheduler.ipynb +++ b/tutorials/scheduler.ipynb @@ -506,7 +506,7 @@ }, "outputs": [], "source": [ - "from ax.service.utils.report_utils import exp_to_df\n", + "from ax.service.utils.best_point_utils import exp_to_df\n", "\n", "exp_to_df(experiment)" ] diff --git a/tutorials/submitit.ipynb b/tutorials/submitit.ipynb index 155f1c57cd5..6bb0c3682b4 100644 --- a/tutorials/submitit.ipynb +++ b/tutorials/submitit.ipynb @@ -25,7 +25,7 @@ "import time\n", "from ax.service.ax_client import AxClient, ObjectiveProperties\n", "from ax.utils.notebook.plotting import render\n", - "from ax.service.utils.report_utils import exp_to_df\n", + "from ax.service.utils.best_point_utils import exp_to_df\n", "from submitit import AutoExecutor, LocalJob, DebugJob" ] },