From ff892b413bd49fe1e56c67be94c623f18ba03fd5 Mon Sep 17 00:00:00 2001 From: xjules Date: Mon, 14 Oct 2024 09:56:45 +0200 Subject: [PATCH] Add design matrix valudation in ensemble experiment panel - Prefil active realization box with realizations from design matrix - Use design_matrix parameters in ensemble experiment - add test run cli with design matrix and poly example - add test that save parameters internalize DataFrame parameters in the storage --- src/ert/enkf_main.py | 29 ++++-- .../simulation/ensemble_experiment_panel.py | 9 +- src/ert/run_models/ensemble_experiment.py | 53 +++++++++-- .../cli/analysis/test_design_matrix.py | 93 +++++++++++++++++++ .../gui/simulation/test_run_dialog.py | 20 +++- tests/ert/unit_tests/test_libres_facade.py | 55 ++++++++++- 6 files changed, 239 insertions(+), 20 deletions(-) create mode 100644 tests/ert/ui_tests/cli/analysis/test_design_matrix.py diff --git a/src/ert/enkf_main.py b/src/ert/enkf_main.py index b139c294146..ac8b2dc096b 100644 --- a/src/ert/enkf_main.py +++ b/src/ert/enkf_main.py @@ -9,19 +9,16 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Mapping, Optional, Union import orjson +import xarray as xr from numpy.random import SeedSequence -from .config import ( - ExtParamConfig, - Field, - GenKwConfig, - ParameterConfig, - SurfaceConfig, -) +from .config import ExtParamConfig, Field, GenKwConfig, ParameterConfig, SurfaceConfig from .run_arg import RunArg from .runpaths import Runpaths if TYPE_CHECKING: + import pandas as pd + from .config import ErtConfig from .storage import Ensemble @@ -148,6 +145,24 @@ def _seed_sequence(seed: Optional[int]) -> int: return int_seed +def save_design_matrix_to_ensemble( + design_matrix_df: pd.DataFrame, + ensemble: Ensemble, + active_realizations: Iterable[int], +) -> None: + assert not design_matrix_df.empty + for realization_nr in active_realizations: + row = design_matrix_df.loc[realization_nr]["DESIGN_MATRIX"] + ds = xr.Dataset( + { + "values": ("names", list(row.values)), + "transformed_values": ("names", list(row.values)), + "names": list(row.keys()), + } + ) + ensemble.save_parameters("DESIGN_MATRIX", realization_nr, ds) + + def sample_prior( ensemble: Ensemble, active_realizations: Iterable[int], diff --git a/src/ert/gui/simulation/ensemble_experiment_panel.py b/src/ert/gui/simulation/ensemble_experiment_panel.py index dd2cf2e513e..417f97c057c 100644 --- a/src/ert/gui/simulation/ensemble_experiment_panel.py +++ b/src/ert/gui/simulation/ensemble_experiment_panel.py @@ -15,7 +15,7 @@ from ert.gui.tools.design_matrix.design_matrix_panel import DesignMatrixPanel from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE from ert.run_models import EnsembleExperiment -from ert.validation import RangeStringArgument +from ert.validation import ActiveRange, RangeStringArgument from ert.validation.proper_name_argument import ExperimentValidation, ProperNameArgument from .experiment_config_panel import ExperimentConfigPanel @@ -85,6 +85,13 @@ def __init__( design_matrix = analysis_config.design_matrix if design_matrix is not None: + if design_matrix.design_matrix_df is None: + design_matrix.read_design_matrix() + + if design_matrix.active_realizations: + self._active_realizations_field.setText( + ActiveRange(design_matrix.active_realizations).rangestring + ) show_dm_param_button = QPushButton("Show parameters") show_dm_param_button.setObjectName("show-dm-parameters") show_dm_param_button.setMinimumWidth(50) diff --git a/src/ert/run_models/ensemble_experiment.py b/src/ert/run_models/ensemble_experiment.py index 25348477b8f..8125e78b01a 100644 --- a/src/ert/run_models/ensemble_experiment.py +++ b/src/ert/run_models/ensemble_experiment.py @@ -6,7 +6,7 @@ import numpy as np -from ert.enkf_main import sample_prior +from ert.enkf_main import sample_prior, save_design_matrix_to_ensemble from ert.ensemble_evaluator import EvaluatorServerConfig from ert.storage import Ensemble, Experiment, Storage @@ -61,7 +61,35 @@ def run_experiment( restart: bool = False, ) -> None: self.log_at_startup() - if not restart: + # If design matrix is present, we substitute the experiment parameters + # with those in the design matrix + if self.ert_config.analysis_config.design_matrix is not None: + if ( + self.ert_config.analysis_config.design_matrix.parameter_configuration + is None + ): + self.ert_config.analysis_config.design_matrix.read_design_matrix() + assert ( + self.ert_config.analysis_config.design_matrix.parameter_configuration + is not None + ) + parameters_config = [ + self.ert_config.analysis_config.design_matrix.parameter_configuration[ + "DESIGN_MATRIX" + ] + ] + self.experiment = self._storage.create_experiment( + name=self.experiment_name, + parameters=parameters_config, + observations=self.ert_config.observations, + responses=self.ert_config.ensemble_config.response_configuration, + ) + self.ensemble = self._storage.create_ensemble( + self.experiment, + name=self.ensemble_name, + ensemble_size=self.ensemble_size, + ) + elif not restart: self.experiment = self._storage.create_experiment( name=self.experiment_name, parameters=self.ert_config.ensemble_config.parameter_configuration, @@ -87,11 +115,22 @@ def run_experiment( np.array(self.active_realizations, dtype=bool), ensemble=self.ensemble, ) - sample_prior( - self.ensemble, - np.where(self.active_realizations)[0], - random_seed=self.random_seed, - ) + if ( + self.ert_config.analysis_config.design_matrix is not None + and self.ert_config.analysis_config.design_matrix.design_matrix_df + is not None + ): + save_design_matrix_to_ensemble( + self.ert_config.analysis_config.design_matrix.design_matrix_df, + self.ensemble, + np.where(self.active_realizations)[0], + ) + else: + sample_prior( + self.ensemble, + np.where(self.active_realizations)[0], + random_seed=self.random_seed, + ) self._evaluate_and_postprocess( run_args, diff --git a/tests/ert/ui_tests/cli/analysis/test_design_matrix.py b/tests/ert/ui_tests/cli/analysis/test_design_matrix.py new file mode 100644 index 00000000000..a0c994ef169 --- /dev/null +++ b/tests/ert/ui_tests/cli/analysis/test_design_matrix.py @@ -0,0 +1,93 @@ +import os +import stat +from textwrap import dedent + +import numpy as np +import pandas as pd +import pytest + +from ert.config import ErtConfig +from ert.mode_definitions import ENSEMBLE_EXPERIMENT_MODE +from ert.storage import open_storage +from tests.ert.ui_tests.cli.run_cli import run_cli + + +@pytest.mark.usefixtures("copy_poly_case") +def test_run_poly_example_with_design_matrix(): + design_matrix = "poly_design.xlsx" + num_realizations = 10 + a_values = list(range(num_realizations)) + design_matrix_df = pd.DataFrame( + { + "REAL": list(range(num_realizations)), + "a": a_values, + } + ) + default_sheet_df = pd.DataFrame([["b", 1], ["c", 2]]) + with pd.ExcelWriter(design_matrix) as xl_write: + design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") + default_sheet_df.to_excel( + xl_write, index=False, sheet_name="DefaultSheet", header=False + ) + + with open("poly.ert", "w", encoding="utf-8") as fout: + fout.write( + dedent( + """\ + QUEUE_OPTION LOCAL MAX_RUNNING 10 + RUNPATH poly_out/realization-/iter- + NUM_REALIZATIONS 10 + MIN_REALIZATIONS 1 + GEN_DATA POLY_RES RESULT_FILE:poly.out + DESIGN_MATRIX poly_design.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet + INSTALL_JOB poly_eval POLY_EVAL + FORWARD_MODEL poly_eval + """ + ) + ) + + with open("poly_eval.py", "w", encoding="utf-8") as f: + f.write( + dedent( + """\ + #!/usr/bin/env python + import numpy as np + import sys + import json + + def _load_coeffs(filename): + with open(filename, encoding="utf-8") as f: + return json.load(f)["DESIGN_MATRIX"] + + def _evaluate(coeffs, x): + return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"] + + if __name__ == "__main__": + coeffs = _load_coeffs("parameters.json") + output = [_evaluate(coeffs, x) for x in range(10)] + with open("poly.out", "w", encoding="utf-8") as f: + f.write("\\n".join(map(str, output))) + """ + ) + ) + os.chmod( + "poly_eval.py", + os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH, + ) + + run_cli( + ENSEMBLE_EXPERIMENT_MODE, + "--disable-monitor", + "poly.ert", + "--experiment-name", + "test-experiment", + ) + storage_path = ErtConfig.from_file("poly.ert").ens_path + with open_storage(storage_path) as storage: + experiment = storage.get_experiment_by_name("test-experiment") + params = experiment.get_ensemble_by_name("default").load_parameters( + "DESIGN_MATRIX" + )["values"] + np.testing.assert_array_equal(params[:, 0], a_values) + np.testing.assert_array_equal(params[:, 1], 10 * [1]) + np.testing.assert_array_equal(params[:, 2], 10 * [2]) diff --git a/tests/ert/unit_tests/gui/simulation/test_run_dialog.py b/tests/ert/unit_tests/gui/simulation/test_run_dialog.py index e8e520e8a83..1f6b24de19b 100644 --- a/tests/ert/unit_tests/gui/simulation/test_run_dialog.py +++ b/tests/ert/unit_tests/gui/simulation/test_run_dialog.py @@ -2,6 +2,7 @@ from queue import SimpleQueue from unittest.mock import MagicMock, Mock, patch +import pandas as pd import pytest from pytestqt.qtbot import QtBot from qtpy import QtWidgets @@ -719,15 +720,26 @@ def test_that_stdout_and_stderr_buttons_react_to_file_content( def test_that_design_matrix_show_parameters_button_is_visible( design_matrix_entry, qtbot: QtBot, storage ): - xls_filename = "design_matrix.xls" - with open(f"{xls_filename}", "w", encoding="utf-8"): - pass + xls_filename = "design_matrix.xlsx" + design_matrix_df = pd.DataFrame( + { + "REAL": list(range(3)), + "a": [0, 1, 2], + } + ) + default_sheet_df = pd.DataFrame([["b", 1], ["c", 2]]) + with pd.ExcelWriter(xls_filename) as xl_write: + design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") + default_sheet_df.to_excel( + xl_write, index=False, sheet_name="DefaultSheet", header=False + ) + config_file = "minimal_config.ert" with open(config_file, "w", encoding="utf-8") as f: f.write("NUM_REALIZATIONS 1") if design_matrix_entry: f.write( - f"\nDESIGN_MATRIX {xls_filename} DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultValues" + f"\nDESIGN_MATRIX {xls_filename} DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet" ) args_mock = Mock() diff --git a/tests/ert/unit_tests/test_libres_facade.py b/tests/ert/unit_tests/test_libres_facade.py index be24bc0d8ae..55155765221 100644 --- a/tests/ert/unit_tests/test_libres_facade.py +++ b/tests/ert/unit_tests/test_libres_facade.py @@ -2,12 +2,15 @@ from datetime import datetime, timedelta from textwrap import dedent +import numpy as np import pytest +from pandas import ExcelWriter from pandas.core.frame import DataFrame from resdata.summary import Summary from ert.config import ErtConfig -from ert.enkf_main import sample_prior +from ert.config.design_matrix import DESIGN_MATRIX_GROUP, DesignMatrix +from ert.enkf_main import sample_prior, save_design_matrix_to_ensemble from ert.libres_facade import LibresFacade from ert.storage import open_storage @@ -241,3 +244,53 @@ def test_load_gen_kw_not_sorted(storage, tmpdir, snapshot): data = ensemble.load_all_gen_kw_data() snapshot.assert_match(data.round(12).to_csv(), "gen_kw_unsorted") + + +@pytest.mark.parametrize( + "reals, expect_error", + [ + pytest.param( + list(range(10)), + False, + id="correct_active_realizations", + ), + pytest.param([10, 11], True, id="incorrect_active_realizations"), + ], +) +def test_save_parameters_to_storage_from_design_dataframe( + tmp_path, reals, expect_error +): + design_path = tmp_path / "design_matrix.xlsx" + ensemble_size = 10 + a_values = np.random.default_rng().uniform(-5, 5, 10) + b_values = np.random.default_rng().uniform(-5, 5, 10) + c_values = np.random.default_rng().uniform(-5, 5, 10) + design_matrix_df = DataFrame({"a": a_values, "b": b_values, "c": c_values}) + with ExcelWriter(design_path) as xl_write: + design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01") + DataFrame().to_excel( + xl_write, index=False, sheet_name="DefaultValues", header=False + ) + design_matrix = DesignMatrix(design_path, "DesignSheet01", "DefaultValues") + design_matrix.read_design_matrix() + with open_storage(tmp_path / "storage", mode="w") as storage: + experiment_id = storage.create_experiment( + parameters=[design_matrix.parameter_configuration[DESIGN_MATRIX_GROUP]] + ) + ensemble = storage.create_ensemble( + experiment_id, name="default", ensemble_size=ensemble_size + ) + if expect_error: + with pytest.raises(KeyError): + save_design_matrix_to_ensemble( + design_matrix.design_matrix_df, ensemble, reals + ) + else: + save_design_matrix_to_ensemble( + design_matrix.design_matrix_df, ensemble, reals + ) + params = ensemble.load_parameters(DESIGN_MATRIX_GROUP)["values"] + all(params.names.values == ["a", "b", "c"]) + np.testing.assert_array_almost_equal(params[:, 0], a_values) + np.testing.assert_array_almost_equal(params[:, 1], b_values) + np.testing.assert_array_almost_equal(params[:, 2], c_values)