From 7edb72ee2bd6088cf5595b3ac915d0d5f258ca0e Mon Sep 17 00:00:00 2001 From: "Yngve S. Kristiansen" Date: Fri, 13 Dec 2024 15:05:02 +0100 Subject: [PATCH] Make work with export --- src/ert/run_models/everest_run_model.py | 6 +- src/everest/api/everest_data_api.py | 227 +++++++++--------------- src/everest/everest_storage.py | 194 +++++++++++++------- src/everest/export.py | 208 ++++++++++++---------- tests/everest/test_api_snapshots.py | 11 +- tests/everest/test_export.py | 22 +-- 6 files changed, 347 insertions(+), 321 deletions(-) diff --git a/src/ert/run_models/everest_run_model.py b/src/ert/run_models/everest_run_model.py index 7a96867769c..3d5848d6154 100644 --- a/src/ert/run_models/everest_run_model.py +++ b/src/ert/run_models/everest_run_model.py @@ -21,13 +21,13 @@ ) import numpy as np -from seba_sqlite import SqliteStorage, sqlite_storage from numpy import float64 from numpy._typing import NDArray from ropt.enums import EventType, OptimizerExitCode from ropt.evaluator import EvaluatorContext, EvaluatorResult from ropt.plan import BasicOptimizer from ropt.plan import Event as OptimizerEvent +from seba_sqlite import SqliteStorage, sqlite_storage from typing_extensions import TypedDict from _ert.events import EESnapshot, EESnapshotUpdate, Event @@ -130,7 +130,7 @@ def __call__(self) -> str | None: ... @dataclass -class OptimalResult: +class OptimalResult: # noqa batch: int controls: list[Any] total_objective: float @@ -297,7 +297,7 @@ def run_experiment( # Seems ROPT batches are 1-indexed now, # whereas seba has its own 0-indexed counter. - assert self._result == optimal_result_from_everstorage + assert self._result.__dict__ == optimal_result_from_everstorage.__dict__ self._exit_code = ( "max_batch_num_reached" diff --git a/src/everest/api/everest_data_api.py b/src/everest/api/everest_data_api.py index 2facfc1d9b5..3788c654d8b 100644 --- a/src/everest/api/everest_data_api.py +++ b/src/everest/api/everest_data_api.py @@ -1,8 +1,9 @@ -from collections import OrderedDict from pathlib import Path +import polars import polars as pl -from seba_sqlite.snapshot import SebaSnapshot +from ropt.enums import ConstraintType +from seba_sqlite import SebaSnapshot from ert.storage import open_storage from everest.config import EverestConfig, ServerConfig @@ -20,42 +21,29 @@ def __init__(self, config: EverestConfig, filter_out_gradient=True): @property def batches(self): - batch_ids = list({opt.batch_id for opt in self._snapshot.optimization_data}) - batch_ids2 = sorted( + return sorted( b.batch_id for b in self._ever_storage.data.batches if b.batch_objectives is not None ) - assert batch_ids == batch_ids2 - return sorted(batch_ids) @property def accepted_batches(self): - batch_ids = list( - {opt.batch_id for opt in self._snapshot.optimization_data if opt.merit_flag} - ) - batch_ids2 = sorted( + return sorted( b.batch_id for b in self._ever_storage.data.batches if b.is_improvement ) - assert batch_ids == batch_ids2 - - return sorted(batch_ids) @property def objective_function_names(self): - original = [fnc.name for fnc in self._snapshot.metadata.objectives.values()] - new = sorted( + return sorted( self._ever_storage.data.objective_functions["objective_name"] .unique() .to_list() ) - assert original == new - return original @property def output_constraint_names(self): - original = [fnc.name for fnc in self._snapshot.metadata.constraints.values()] - new = ( + return ( sorted( self._ever_storage.data.nonlinear_constraints["constraint_name"] .unique() @@ -64,28 +52,16 @@ def output_constraint_names(self): if self._ever_storage.data.nonlinear_constraints is not None else [] ) - assert original == new - return original def input_constraint(self, control): - controls = [ - con - for con in self._snapshot.metadata.controls.values() - if con.name == control - ] - - original = {"min": controls[0].min_value, "max": controls[0].max_value} - initial_values = self._ever_storage.data.initial_values control_spec = initial_values.filter( pl.col("control_name") == control ).to_dicts()[0] - new = { + return { "min": control_spec.get("lower_bounds"), "max": control_spec.get("upper_bounds"), } - assert new == original - return original def output_constraint(self, constraint): """ @@ -95,146 +71,122 @@ def output_constraint(self, constraint): "right_hand_side" is a constant real number that indicates the constraint bound/target. """ - constraints = [ - con - for con in self._snapshot.metadata.constraints.values() - if con.name == constraint - ] - - old = { - "type": constraints[0].constraint_type, - "right_hand_side": constraints[0].rhs_value, - } constraint_dict = self._ever_storage.data.nonlinear_constraints.to_dicts()[0] - new = { - "type": constraint_dict["constraint_type"], - "right_hand_side": constraint_dict["rhs_value"], + return { + "type": ConstraintType(constraint_dict["constraint_type"]).name.lower(), + "right_hand_side": constraint_dict["constraint_rhs_value"], } - assert old == new - return new - @property def realizations(self): - old = list( - OrderedDict.fromkeys( - int(sim.realization) for sim in self._snapshot.simulation_data - ) - ) - new = sorted( + return sorted( self._ever_storage.data.batches[0] .realization_objectives["realization"] .unique() .to_list() ) - assert old == new - return new @property def simulations(self): - old = list( - OrderedDict.fromkeys( - [int(sim.simulation) for sim in self._snapshot.simulation_data] - ) - ) - - new = sorted( + return sorted( self._ever_storage.data.batches[0] - .realization_objectives["result_id"] + .realization_objectives["simulation_id"] .unique() .to_list() ) - assert old == new - return new @property def control_names(self): - old = [con.name for con in self._snapshot.metadata.controls.values()] - new = sorted( + return sorted( self._ever_storage.data.initial_values["control_name"].unique().to_list() ) - assert old == new - return new @property def control_values(self): - controls = [con.name for con in self._snapshot.metadata.controls.values()] - return [ - {"control": con, "batch": sim.batch, "value": sim.controls[con]} - for sim in self._snapshot.simulation_data - for con in controls - if con in sim.controls - ] + all_control_names = self._ever_storage.data.initial_values[ + "control_name" + ].to_list() + new = [] + for batch in self._ever_storage.data.batches: + if batch.realization_controls is None: + continue + + for controls_dict in batch.realization_controls.to_dicts(): + for name in all_control_names: + new.append( + { + "control": name, + "batch": batch.batch_id, + "value": controls_dict[name], + } + ) + + return new @property def objective_values(self): - old = [ - { - "function": objective.name, - "batch": sim.batch, - "realization": sim.realization, - "simulation": sim.simulation, - "value": sim.objectives[objective.name], - "weight": objective.weight, - "norm": objective.normalization, - } - for sim in self._snapshot.simulation_data - for objective in self._snapshot.metadata.objectives.values() - if objective.name in sim.objectives - ] - - new = [ + return [ b for b in self._ever_storage.data.batches if b.batch_objectives is not None ] - assert old == new - - return old - @property def single_objective_values(self): - single_obj = [ - { - "batch": optimization_el.batch_id, - "objective": optimization_el.objective_value, - "accepted": optimization_el.merit_flag, - } - for optimization_el in self._snapshot.optimization_data - ] - metadata = { - func.name: {"weight": func.weight, "norm": func.normalization} - for func in self._snapshot.metadata.functions.values() - if func.function_type == func.FUNCTION_OBJECTIVE_TYPE - } - if len(metadata) == 1: - return single_obj - objectives = [] - for name, values in self._snapshot.expected_objectives.items(): - for idx, val in enumerate(values): - factor = metadata[name]["weight"] * metadata[name]["norm"] - if len(objectives) > idx: - objectives[idx].update({name: val * factor}) - else: - objectives.append({name: val * factor}) - for idx, obj in enumerate(single_obj): - obj.update(objectives[idx]) + batch_datas = polars.concat( + [ + b.batch_objectives.select( + c for c in b.batch_objectives.columns if c != "merit_value" + ).with_columns( + polars.lit(1 if b.is_improvement else 0).alias("accepted") + ) + for b in self._ever_storage.data.batches + if b.realization_controls is not None + ] + ) + objectives = self._ever_storage.data.objective_functions - return single_obj + for o in objectives.to_dicts(): + batch_datas = batch_datas.with_columns( + polars.col(o["objective_name"]) * o["weight"] * o["normalization"] + ) + + return ( + batch_datas.rename( + {"total_objective_value": "objective", "batch_id": "batch"} + ) + .select("batch", "objective", "accepted") + .to_dicts() + ) @property def gradient_values(self): - return [ - { - "batch": optimization_el.batch_id, - "function": function, - "control": control, - "value": value, - } - for optimization_el in self._snapshot.optimization_data - for function, info in optimization_el.gradient_info.items() - for control, value in info.items() + all_batch_data = [ + b.batch_objective_gradient + for b in self._ever_storage.data.batches + if b.batch_objective_gradient is not None + ] + if not all_batch_data: + return [] + + all_info = polars.concat(all_batch_data).drop("result_id") + objective_columns = [ + c + for c in all_info.drop(["batch_id", "control_name"]).columns + if not c.endswith(".total") ] + return ( + all_info.select("batch_id", "control_name", *objective_columns) + .unpivot( + on=objective_columns, + index=["batch_id", "control_name"], + variable_name="function", + value_name="value", + ) + .rename({"control_name": "control", "batch_id": "batch"}) + .sort(by=["batch", "control"]) + .select(["batch", "function", "control", "value"]) + .to_dicts() + ) def summary_values(self, batches=None, keys=None): if batches is None: @@ -265,13 +217,8 @@ def summary_values(self, batches=None, keys=None): summary = summary.with_columns( pl.Series("batch", [batch_id] * summary.shape[0]) ) - # The realization ID as defined by Everest must be - # retrieved via the seba snapshot. - realization_map = { - sim.simulation: sim.realization - for sim in self._snapshot.simulation_data - if sim.batch == batch_id - } + + realization_map = self._ever_storage.data.simulation_to_realization_map realizations = pl.Series( "realization", [realization_map.get(str(sim)) for sim in summary["simulation"]], diff --git a/src/everest/everest_storage.py b/src/everest/everest_storage.py index 5d3b30d46b4..efe7a00dbd5 100644 --- a/src/everest/everest_storage.py +++ b/src/everest/everest_storage.py @@ -52,7 +52,7 @@ def try_read_df(path: Path) -> polars.DataFrame | None: @dataclass class BatchDataFrames: batch_id: int - batch_controls: polars.DataFrame + realization_controls: polars.DataFrame batch_objectives: polars.DataFrame | None realization_objectives: polars.DataFrame | None batch_constraints: polars.DataFrame | None @@ -73,6 +73,9 @@ def existing_dataframes(self) -> dict[str, polars.DataFrame]: if self.realization_objectives is not None: dataframes["realization_objectives"] = self.realization_objectives + if self.realization_controls is not None: + dataframes["realization_controls"] = self.realization_controls + if self.batch_constraints is not None: dataframes["batch_constraints"] = self.batch_constraints @@ -103,6 +106,27 @@ class EverestStorageDataFrames: nonlinear_constraints: polars.DataFrame | None = None realization_weights: polars.DataFrame | None = None + @property + def simulation_to_realization_map(self): + dummy_df = next( + ( + b.realization_controls + for b in self.batches + if b.realization_controls is not None + ), + None, + ) + + if dummy_df is None: + return {} + + mapping = {} + for d in dummy_df.select("realization", "simulation_id").to_dicts(): + # Currently we work with str, but should maybe not be done in future + mapping[str(d["simulation_id"])] = str(d["realization"]) + + return mapping + def write_to_experiment( self, experiment: _OptimizerOnlyExperiment, write_csv=False ): @@ -250,8 +274,8 @@ def read_from_experiment(self, experiment: _OptimizerOnlyExperiment) -> None: ens.optimizer_mount_point / "perturbation_constraints.parquet" ) - batch_controls = try_read_df( - ens.optimizer_mount_point / "batch_controls.parquet" + realization_controls = try_read_df( + ens.optimizer_mount_point / "realization_controls.parquet" ) with open(ens.optimizer_mount_point / "batch.json", encoding="utf-8") as f: @@ -262,7 +286,7 @@ def read_from_experiment(self, experiment: _OptimizerOnlyExperiment) -> None: self.batches.append( BatchDataFrames( batch_id, - batch_controls, + realization_controls, batch_objectives, realization_objectives, batch_constraints, @@ -321,7 +345,7 @@ def get_ensemble_by_name(self, name: str) -> _OptimizerOnlyEnsemble: @dataclass class _EvaluationResults: - batch_controls: polars.DataFrame + realization_controls: polars.DataFrame batch_objectives: polars.DataFrame realization_objectives: polars.DataFrame batch_constraints: polars.DataFrame | None @@ -411,13 +435,13 @@ def _initialize(self, event): self._convert_names(config.variables.names), dtype=polars.String ), "initial_value": polars.Series( - config.variables.initial_values, dtype=polars.Float32 + config.variables.initial_values, dtype=polars.Float64 ), "lower_bounds": polars.Series( - config.variables.lower_bounds, dtype=polars.Float32 + config.variables.lower_bounds, dtype=polars.Float64 ), "upper_bounds": polars.Series( - config.variables.upper_bounds, dtype=polars.Float32 + config.variables.upper_bounds, dtype=polars.Float64 ), } ) @@ -426,11 +450,11 @@ def _initialize(self, event): { "objective_name": config.objectives.names, "weight": polars.Series( - config.objectives.weights, dtype=polars.Float32 + config.objectives.weights, dtype=polars.Float64 ), "normalization": polars.Series( [1.0 / s for s in config.objectives.scales], - dtype=polars.Float32, + dtype=polars.Float64, ), } ) @@ -439,7 +463,9 @@ def _initialize(self, event): self.data.nonlinear_constraints = polars.DataFrame( { "constraint_name": config.nonlinear_constraints.names, - "normalization": config.nonlinear_constraints.scales, + "normalization": [ + 1.0 / s for s in config.nonlinear_constraints.scales + ], # Q: Is this correct? "constraint_rhs_value": config.nonlinear_constraints.rhs_values, "constraint_type": config.nonlinear_constraints.types, } @@ -451,7 +477,7 @@ def _initialize(self, event): config.realizations.names, dtype=polars.UInt16 ), "weight": polars.Series( - config.realizations.weights, dtype=polars.Float32 + config.realizations.weights, dtype=polars.Float64 ), } ) @@ -462,7 +488,7 @@ def _store_function_results(self, results: FunctionResults) -> _EvaluationResult realization_objectives = polars.from_pandas( results.to_dataframe( "evaluations", - select=["variables", "objectives", "constraints", "evaluation_ids"], + select=["objectives", "constraints", "evaluation_ids"], ).reset_index(), ).drop("plan_id") batch_objectives = polars.from_pandas( @@ -472,27 +498,21 @@ def _store_function_results(self, results: FunctionResults) -> _EvaluationResult ).reset_index() ).drop("plan_id") - batch_controls = polars.from_pandas( - results.to_dataframe("evaluations", select=["variables"]).reset_index() + realization_controls = polars.from_pandas( + results.to_dataframe( + "evaluations", select=["variables", "evaluation_ids"] + ).reset_index() ).drop("plan_id") - batch_controls = self._rename_columns(batch_controls) - control_names = batch_controls["control_name"].unique().to_list() + realization_controls = self._rename_columns(realization_controls) + realization_controls = self._enforce_dtypes(realization_controls) - has_scaled_controls = "scaled_control_value" in batch_controls - batch_controls = batch_controls.pivot( + realization_controls = realization_controls.pivot( on="control_name", values=["control_value"], # , "scaled_control_value"] separator=":", ) - if has_scaled_controls: - batch_controls = batch_controls.rename( - { - **{f"control_value:{name}": name for name in control_names}, - } - ) - try: batch_constraints = polars.from_pandas( results.to_dataframe("nonlinear_constraints").reset_index() @@ -503,7 +523,10 @@ def _store_function_results(self, results: FunctionResults) -> _EvaluationResult realization_constraints = None batch_objectives = self._rename_columns(batch_objectives) + batch_objectives = self._enforce_dtypes(batch_objectives) + realization_objectives = self._rename_columns(realization_objectives) + realization_objectives = self._enforce_dtypes(realization_objectives) batch_objectives = batch_objectives.pivot( on="objective_name", @@ -544,35 +567,33 @@ def _store_function_results(self, results: FunctionResults) -> _EvaluationResult "result_id", "batch_id", "realization", + "simulation_id", "constraint_name", "constraint_value", - ].unique(["result_id", "batch_id", "realization", "constraint_name"]) + ] realization_constraints = realization_constraints.pivot( values=["constraint_value"], on="constraint_name" ) realization_objectives = realization_objectives.drop( [c for c in realization_objectives.columns if "constraint" in c.lower()] - ).unique(subset=["result_id", "batch_id", "realization", "control_name"]) + ) batch_objectives = batch_objectives.drop( [c for c in batch_objectives.columns if "constraint" in c.lower()] - ).unique(subset=["result_id", "batch_id"]) - - realization_objectives = ( - realization_objectives.drop(["control_name", "control_value"]) - .unique(subset=["result_id", "batch_id", "realization", "objective_name"]) - .pivot( - values="objective_value", - index=[ - "result_id", - "batch_id", - "realization", - ], - columns="objective_name", ) + + realization_objectives = realization_objectives.pivot( + values="objective_value", + index=[ + "result_id", + "batch_id", + "realization", + "simulation_id", + ], + columns="objective_name", ) return _EvaluationResults( - batch_controls, + realization_controls, batch_objectives, realization_objectives, batch_constraints, @@ -585,7 +606,7 @@ def _rename_columns(df: polars.DataFrame): if len(scaled_cols) > 0: raise ValueError("Don't store scaled columns") - _renames = { + renames = { "objective": "objective_name", "weighted_objective": "total_objective_value", "variable": "control_name", @@ -601,8 +622,51 @@ def _rename_columns(df: polars.DataFrame): "scaled_perturbed_objectives": "scaled_perturbed_objective_value", "scaled_perturbed_constraints": "scaled_perturbed_constraint_value", "scaled_variables": "scaled_control_value", + "evaluation_ids": "simulation_id", } - return df.rename({k: v for k, v in _renames.items() if k in df.columns}) + return df.rename({k: v for k, v in renames.items() if k in df.columns}) + + @staticmethod + def _enforce_dtypes(df: polars.DataFrame): + dtypes = { + "batch_id": polars.UInt16, + "result_id": polars.UInt16, + "perturbation": polars.UInt16, + "realization": polars.UInt16, + "simulation_id": polars.UInt16, + "objective_name": polars.String, + "control_name": polars.String, + "constraint_name": polars.String, + "total_objective_value": polars.Float64, + "control_value": polars.Float64, + "objective_value": polars.Float64, + "constraint_value": polars.Float64, + "scaled_constraint_value": polars.Float64, + "scaled_objective_value": polars.Float64, + "perturbed_control_value": polars.Float64, + "perturbed_objective_value": polars.Float64, + "perturbed_constraint_value": polars.Float64, + "scaled_perturbed_objective_value": polars.Float64, + "scaled_perturbed_constraint_value": polars.Float64, + "scaled_control_value": polars.Float64, + } + + existing_cols = set(df.columns) + unaccounted_cols = existing_cols - set(dtypes) + if len(unaccounted_cols) > 0: + raise KeyError( + f"Expected all keys to have a specified dtype, found {unaccounted_cols}" + ) + + df = df.cast( + { + colname: dtype + for colname, dtype in dtypes.items() + if colname in df.columns + } + ) + + return df def _store_gradient_results(self, results: FunctionResults) -> _GradientResults: perturbation_objectives = polars.from_pandas( @@ -628,8 +692,10 @@ def _store_gradient_results(self, results: FunctionResults) -> _GradientResults: if c.lower().startswith("scaled") ) batch_objective_gradient = self._rename_columns(batch_objective_gradient) + batch_objective_gradient = self._enforce_dtypes(batch_objective_gradient) perturbation_objectives = self._rename_columns(perturbation_objectives) + perturbation_objectives = self._rename_columns(perturbation_objectives) if "constraint_name" in perturbation_objectives: perturbation_constraints = ( @@ -761,41 +827,43 @@ def _handle_finished_batch_event(self, event: Event): # +-----------------------------------------------------------------+ last_batch = -1 - _batches = {} + batches = {} for item in results: - if item.batch_id not in _batches: - _batches[item.batch_id] = {} + if item.batch_id not in batches: + batches[item.batch_id] = {} if isinstance(item, FunctionResults): eval_results = self._store_function_results(item) - _batches[item.batch_id]["batch_controls"] = eval_results.batch_controls - _batches[item.batch_id]["batch_objectives"] = ( + batches[item.batch_id]["realization_controls"] = ( + eval_results.realization_controls + ) + batches[item.batch_id]["batch_objectives"] = ( eval_results.batch_objectives ) - _batches[item.batch_id]["realization_objectives"] = ( + batches[item.batch_id]["realization_objectives"] = ( eval_results.realization_objectives ) - _batches[item.batch_id]["batch_constraints"] = ( + batches[item.batch_id]["batch_constraints"] = ( eval_results.batch_constraints ) - _batches[item.batch_id]["realization_constraints"] = ( + batches[item.batch_id]["realization_constraints"] = ( eval_results.realization_constraints ) if isinstance(item, GradientResults): gradient_results = self._store_gradient_results(item) - _batches[item.batch_id]["batch_objective_gradient"] = ( + batches[item.batch_id]["batch_objective_gradient"] = ( gradient_results.batch_objective_gradient ) - _batches[item.batch_id]["perturbation_objectives"] = ( + batches[item.batch_id]["perturbation_objectives"] = ( gradient_results.perturbation_objectives ) - _batches[item.batch_id]["batch_constraint_gradient"] = ( + batches[item.batch_id]["batch_constraint_gradient"] = ( gradient_results.batch_constraint_gradient ) - _batches[item.batch_id]["perturbation_constraints"] = ( + batches[item.batch_id]["perturbation_constraints"] = ( gradient_results.perturbation_constraints ) @@ -805,11 +873,11 @@ def _handle_finished_batch_event(self, event: Event): # self._database.set_batch_ended last_batch = item.batch_id - for batch_id, info in _batches.items(): + for batch_id, info in batches.items(): self.data.batches.append( BatchDataFrames( batch_id=batch_id, - batch_controls=info.get("batch_controls"), + realization_controls=info.get("realization_controls"), batch_objectives=info.get("batch_objectives"), realization_objectives=info.get("realization_objectives"), batch_constraints=info.get("batch_constraints"), @@ -870,20 +938,22 @@ def find_best_batch(filter_by, sort_by): return None matching_batches.sort(key=sort_by) - _batch = matching_batches[0] - _controls_dict = _batch.batch_controls.drop( + batch = matching_batches[0] + controls_dict = batch.realization_controls.drop( [ "result_id", "batch_id", + "simulation_id", + "realization", *[ c - for c in _batch.batch_controls.columns + for c in batch.realization_controls.columns if c.endswith(".scaled") # don't need scaled control values ], ] ).to_dicts()[0] - return _batch, _controls_dict + return batch, controls_dict if has_merit: # Minimize merit diff --git a/src/everest/export.py b/src/everest/export.py index dd503328099..f421648bd0f 100644 --- a/src/everest/export.py +++ b/src/everest/export.py @@ -2,12 +2,10 @@ import re from enum import StrEnum from pathlib import Path -from typing import Any import pandas as pd import polars from pandas import DataFrame -from seba_sqlite.snapshot import SebaSnapshot from ert.storage import open_storage from everest.config import ExportConfig @@ -61,10 +59,8 @@ def filter_data(data: DataFrame, keyword_filters: set[str]): def available_batches(optimization_output_dir: str) -> set[int]: - snapshot = SebaSnapshot(optimization_output_dir).get_snapshot( - filter_out_gradient=False, batches=None - ) - return {data.batch for data in snapshot.simulation_data} + storage = EverestStorage(Path(optimization_output_dir)) + return {b.batch_id for b in storage.data.batches} def export_metadata(config: ExportConfig | None, optimization_output_dir: str): @@ -86,101 +82,131 @@ def export_metadata(config: ExportConfig | None, optimization_output_dir: str): discard_gradient = False batches = config.batches - snapshot = SebaSnapshot(optimization_output_dir).get_snapshot( - filter_out_gradient=discard_gradient, - batches=batches, - ) storage = EverestStorage(Path(optimization_output_dir)) storage.read_from_output_dir() - opt_data = snapshot.optimization_data_by_batch metadata = [] + for batch_info in (b for b in storage.data.batches): + if discard_rejected and not batch_info.is_improvement: + continue - for data in snapshot.simulation_data: - # If export section not defined in the config file export only increased - # merit non-gradient simulation results - if ( - discard_rejected - and data.batch in opt_data - and opt_data[data.batch].merit_flag != 1 - ): + if batches is not None and batch_info.batch_id not in batches: continue - md_row: dict[str, Any] = { - MetaDataColumnNames.BATCH: data.batch, - MetaDataColumnNames.SIM_AVERAGED_OBJECTIVE: data.sim_avg_obj, - MetaDataColumnNames.IS_GRADIENT: data.is_gradient, - MetaDataColumnNames.REALIZATION: int(data.realization), - MetaDataColumnNames.START_TIME: data.start_time, - MetaDataColumnNames.END_TIME: data.end_time, - MetaDataColumnNames.SUCCESS: data.success, - MetaDataColumnNames.REALIZATION_WEIGHT: data.realization_weight, - MetaDataColumnNames.SIMULATION: int(data.simulation), - } - if data.objectives: - md_row.update(data.objectives) - if data.constraints: - md_row.update(data.constraints) - if data.controls: - md_row.update(data.controls) - - if not md_row[MetaDataColumnNames.IS_GRADIENT]: - if md_row[MetaDataColumnNames.BATCH] in opt_data: - opt = opt_data[md_row[MetaDataColumnNames.BATCH]] - md_row.update( - { - MetaDataColumnNames.REAL_AVERAGED_OBJECTIVE: opt.objective_value, - MetaDataColumnNames.INCREASED_MERIT: opt.merit_flag, - } + all_control_names = storage.data.initial_values["control_name"].to_list() + all_objective_names = storage.data.objective_functions[ + "objective_name" + ].to_list() + # all_constraint_names = storage.data.nonlinear_constraints[ + # "constraint_name" + # ].to_list() + + realization_info = batch_info.realization_objectives + + if batch_info.realization_constraints is not None: + realization_info = realization_info.join( + batch_info.realization_constraints, + on=["result_id", "batch_id", "realization", "simulation_id"], + ) + + realization_info = realization_info.join( + batch_info.realization_controls, + on=["result_id", "batch_id", "realization", "simulation_id"], + ) + for real_tuple, data in realization_info.group_by("realization"): + realization = real_tuple[0] + + objectives_dict = {} + objectives_gradient_dict = {} + for objective in storage.data.objective_functions.to_dicts(): + weight = objective["weight"] + normalization = objective["normalization"] + objective_name = objective["objective_name"] + objective_value = data[objective_name].item() + + objectives_dict[objective_name] = objective_value + objectives_dict[f"{objective_name}_norm"] = ( + objective_value * normalization ) - for function, gradients in opt.gradient_info.items(): - for control, gradient_value in gradients.items(): - md_row.update( - {f"gradient-{function}-{control}": gradient_value} - ) - else: - print( - f"Batch {md_row[MetaDataColumnNames.BATCH]} has no available optimization data" + objectives_dict[f"{objective_name}_weighted_norm"] = ( + objective_value * weight * normalization ) - metadata.append(md_row) - - # Contains information about the simulations: - # batch -> the batch id - # objectives -> Dictionary mapping the objective function names to the - # objective values per simulation also contains mapping - # of the normalized and weighted normalized objective values - # constraints -> Dictionary mapping the constraint function names to the - # constraint values per simulation also contains mapping of - # the normalized and weighted normalized constraint values - # controls -> Dictionary mapping the control names to their values. - # Controls generating the simulation results - # sim_avg_obj -> The value of the objective function for the simulation - # is_gradient -> Flag describing if the simulation is a gradient or non - # gradient simulation - # realization -> The name of the realization the simulation is part of - # start_time -> The starting timpestamp for the simulation - # end_time -> The end timpstamp for the simulation - # success -> Flag describing if the simulation was successful or not (1 or 0) - # realization_weight -> The weight of the realization the simulation was part of. - # simulation -> The simulation number used in libres - - # WIP! - metadata2 = [] - for i, batch_info in enumerate(storage.data.batches): - if discard_rejected and not batch_info.is_improvement: - continue - corresponding = metadata[i] - print("Yo") - md_row2: Dict[str, Any] = { - MetaDataColumnNames.BATCH: batch_info.batch_id, - MetaDataColumnNames.SIM_AVERAGED_OBJECTIVE: batch_info.batch_objectives.select( - polars.mean("total_objective_value") - ).item(), - MetaDataColumnNames.REALIZATION: None, - } - metadata2.append(md_row2) - assert corresponding is not None + if not discard_gradient and batch_info.batch_objective_gradient is not None: + for objective_name in all_objective_names: + for d in batch_info.batch_objective_gradient.select( + "control_name", objective_name + ).to_dicts(): + objectives_gradient_dict[ + f"gradient-{objective_name}-{d['control_name']}" + ] = d[objective_name] + + # Q: Seems to not be exported, why? + # constraints_gradient_dict = {} + # if batch_info.batch_constraint_gradient is not None: + # for constraint_name in all_constraint_names: + # for d in batch_info.batch_constraint_gradient.select( + # "control_name", constraint_name + # ).to_dicts(): + # constraints_gradient_dict[ + # f"gradient-{constraint_name}-{d['control_name']}" + # ] = d[constraint_name] + + constraints_dict = {} + if storage.data.nonlinear_constraints is not None: + for constraint in storage.data.nonlinear_constraints.to_dicts(): + # SEBA always just sets it to 1 for functions as a "convenience" + weight = 1 + + normalization = constraint["normalization"] + constraint_name = constraint["constraint_name"] + constraint_value = data[constraint_name].item() + + constraints_dict[constraint_name] = constraint_value + constraints_dict[f"{constraint_name}_norm"] = ( + constraint_value * normalization + ) + constraints_dict[f"{constraint_name}_weighted_norm"] = ( + constraint_value * weight * normalization + ) + + controls_dict = { + control_name: data[control_name].item() + for control_name in all_control_names + } + + obj_values_for_real = ( + batch_info.realization_objectives["realization", *all_objective_names] + .filter(polars.col("realization").eq(realization)) + .drop("realization") + .transpose() + .to_series() + .to_list() + ) + total_objective_value_for_real = sum(obj_values_for_real) / len( + obj_values_for_real + ) + + my_stuff = { + MetaDataColumnNames.BATCH: batch_info.batch_id, + MetaDataColumnNames.SIM_AVERAGED_OBJECTIVE: batch_info.batch_objectives[ + "total_objective_value" + ].item(), + MetaDataColumnNames.IS_GRADIENT: 0, # Q: get from everest config? + MetaDataColumnNames.REALIZATION: realization, + MetaDataColumnNames.SUCCESS: 1, # Q: is it always 1? + MetaDataColumnNames.REALIZATION_WEIGHT: storage.data.realization_weights.filter( + polars.col("realization") == realization + )["weight"].first(), + MetaDataColumnNames.SIMULATION: int(data["simulation_id"].item()), + **objectives_dict, + **controls_dict, + MetaDataColumnNames.REAL_AVERAGED_OBJECTIVE: total_objective_value_for_real, + MetaDataColumnNames.INCREASED_MERIT: batch_info.is_improvement, + **objectives_gradient_dict, + } + + metadata.append(my_stuff) return metadata diff --git a/tests/everest/test_api_snapshots.py b/tests/everest/test_api_snapshots.py index 2a51b6534b3..d15590c3688 100644 --- a/tests/everest/test_api_snapshots.py +++ b/tests/everest/test_api_snapshots.py @@ -53,15 +53,7 @@ def make_api_snapshot(api) -> dict[str, Any]: @pytest.mark.parametrize( "config_file", - [ - "config_advanced.yml", - "config_minimal.yml", - "config_multiobj.yml", - "config_auto_scaled_controls.yml", - "config_cvar.yml", - "config_discrete.yml", - "config_stddev.yml", - ], + ["config_advanced.yml", "config_minimal.yml", "config_multiobj.yml"], ) def test_api_snapshots(config_file, snapshot, cached_example): config_path, config_file, optimal_result_json = cached_example( @@ -79,6 +71,7 @@ def test_api_snapshots(config_file, snapshot, cached_example): .strip() + "\n" ) + snapshot.assert_match(snapshot_str, "snapshot.json") diff --git a/tests/everest/test_export.py b/tests/everest/test_export.py index 4686748df4a..e496f9a9923 100644 --- a/tests/everest/test_export.py +++ b/tests/everest/test_export.py @@ -70,9 +70,7 @@ def test_export_only_non_gradient_with_increased_merit(cached_example, snapshot) # Test that the default export functionality generated data frame # contains only non gradient simulations - snapshot.assert_match( - df.drop(["start_time", "end_time"], axis=1).round(4).to_csv(), "export.csv" - ) + snapshot.assert_match(df.round(4).to_csv(), "export.csv") def test_export_only_non_gradient(cached_example, snapshot): @@ -88,9 +86,7 @@ def test_export_only_non_gradient(cached_example, snapshot): data_file=config.model.data_file if config.model else None, ) - snapshot.assert_match( - df.drop(["start_time", "end_time"], axis=1).round(4).to_csv(), "export.csv" - ) + snapshot.assert_match(df.round(4).to_csv(), "export.csv") def test_export_only_increased_merit(cached_example, snapshot): @@ -107,7 +103,7 @@ def test_export_only_increased_merit(cached_example, snapshot): ) snapshot.assert_match( - df.drop(["start_time", "end_time"], axis=1).round(4).to_csv(), + df.round(4).to_csv(), "export.csv", ) @@ -125,9 +121,7 @@ def test_export_all_batches(cached_example, snapshot): data_file=config.model.data_file if config.model else None, ) - snapshot.assert_match( - df.drop(["start_time", "end_time"], axis=1).round(4).to_csv(), "export.csv" - ) + snapshot.assert_match(df.round(4).to_csv(), "export.csv") def test_export_only_give_batches(cached_example, snapshot): @@ -143,9 +137,7 @@ def test_export_only_give_batches(cached_example, snapshot): data_file=config.model.data_file if config.model else None, ) - snapshot.assert_match( - df.drop(["start_time", "end_time"], axis=1).round(4).to_csv(), "export.csv" - ) + snapshot.assert_match(df.round(4).to_csv(), "export.csv") def test_export_batches_progress(cached_example, snapshot): @@ -348,6 +340,4 @@ def test_export_gradients(cached_example, snapshot): data_file=config.model.data_file if config.model else None, ) - snapshot.assert_match( - df.drop(["start_time", "end_time"], axis=1).round(4).to_csv(), "export.csv" - ) + snapshot.assert_match(df.round(4).to_csv(), "export.csv")