Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for multiple occurrences of design matrix #9583

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/ert/config/analysis_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig:

min_realization = min(min_realization, num_realization)

design_matrix_config_list = config_dict.get(ConfigKeys.DESIGN_MATRIX, None)
design_matrix_config_lists = config_dict.get(ConfigKeys.DESIGN_MATRIX, [])

options: dict[str, dict[str, Any]] = {"STD_ENKF": {}, "IES_ENKF": {}}
observation_settings: dict[str, Any] = {
Expand Down Expand Up @@ -187,16 +187,23 @@ def from_dict(cls, config_dict: ConfigDict) -> AnalysisConfig:
if all_errors:
raise ConfigValidationError.from_collected(all_errors)

design_matrices = [
DesignMatrix.from_config_list(design_matrix_config_list)
for design_matrix_config_list in design_matrix_config_lists
]
design_matrix: DesignMatrix | None = None
if design_matrices:
design_matrix = design_matrices[0]
for dm_other in design_matrices[1:]:
design_matrix.merge_with_other(dm_other)
config = cls(
max_runtime=config_dict.get(ConfigKeys.MAX_RUNTIME),
minimum_required_realizations=min_realization,
update_log_path=config_dict.get(ConfigKeys.UPDATE_LOG_PATH, "update_log"),
observation_settings=obs_settings,
es_module=es_settings,
ies_module=ies_settings,
design_matrix=DesignMatrix.from_config_list(design_matrix_config_list)
if design_matrix_config_list is not None
else None,
design_matrix=design_matrix,
)
return config

Expand Down
32 changes: 32 additions & 0 deletions src/ert/config/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,38 @@ def from_config_list(cls, config_list: list[str]) -> DesignMatrix:
default_sheet=default_sheet,
)

def merge_with_other(self, dm_other: DesignMatrix) -> None:
errors = []
if self.active_realizations != dm_other.active_realizations:
errors.append(
ErrorInfo("Design Matrices don't have the same active realizations!")
)

common_keys = set(self.design_matrix_df.columns) & set(
dm_other.design_matrix_df.columns
)
if common_keys:
errors.append(
ErrorInfo(f"Design Matrices do not have unique keys {common_keys}!")
)

try:
self.design_matrix_df = pd.concat(
[self.design_matrix_df, dm_other.design_matrix_df], axis=1
)
except ValueError as exc:
errors.append(ErrorInfo(f"Error when merging design matrices {exc}!"))

pc_other = dm_other.parameter_configuration[DESIGN_MATRIX_GROUP]
pc_self = self.parameter_configuration[DESIGN_MATRIX_GROUP]
assert isinstance(pc_other, GenKwConfig)
assert isinstance(pc_self, GenKwConfig)
for tfd in pc_other.transform_function_definitions:
pc_self.transform_function_definitions.append(tfd)

if errors:
raise ConfigValidationError.from_collected(errors)

def merge_with_existing_parameters(
self, existing_parameters: list[ParameterConfig]
) -> tuple[list[ParameterConfig], ParameterConfig | None]:
Expand Down
2 changes: 1 addition & 1 deletion src/ert/config/parsing/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ def design_matrix_keyword() -> SchemaItem:
SchemaItemType.STRING,
SchemaItemType.STRING,
],
multi_occurrence=False,
multi_occurrence=True,
)


Expand Down
142 changes: 117 additions & 25 deletions tests/ert/ui_tests/cli/analysis/test_design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,30 @@
from tests.ert.ui_tests.cli.run_cli import run_cli


def _create_design_matrix(filename, design_sheet_df, default_sheet_df=None):
with pd.ExcelWriter(filename) as xl_write:
design_sheet_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
if default_sheet_df is not None:
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)


@pytest.mark.usefixtures("copy_poly_case")
def test_run_poly_example_with_design_matrix():
design_matrix = "poly_design.xlsx"
num_realizations = 10
a_values = list(range(num_realizations))
design_matrix_df = pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
"category": 5 * ["cat1"] + 5 * ["cat2"],
}
_create_design_matrix(
"poly_design.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
"category": 5 * ["cat1"] + 5 * ["cat2"],
}
),
pd.DataFrame([["b", 1], ["c", 2]]),
)
default_sheet_df = pd.DataFrame([["b", 1], ["c", 2]])
with pd.ExcelWriter(design_matrix) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
Expand Down Expand Up @@ -105,21 +111,18 @@ def _evaluate(coeffs, x):
],
)
def test_run_poly_example_with_design_matrix_and_genkw_merge(default_values, error_msg):
design_matrix = "poly_design.xlsx"
num_realizations = 10
a_values = list(range(num_realizations))
design_matrix_df = pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
_create_design_matrix(
"poly_design.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
),
pd.DataFrame(default_values),
)
default_sheet_df = pd.DataFrame(default_values)
with pd.ExcelWriter(design_matrix) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultSheet", header=False
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
Expand Down Expand Up @@ -191,3 +194,92 @@ def _evaluate(coeffs, x):
np.testing.assert_array_equal(params[:, 0], a_values)
np.testing.assert_array_equal(params[:, 1], 10 * [1])
np.testing.assert_array_equal(params[:, 2], 10 * [2])


@pytest.mark.usefixtures("copy_poly_case")
def test_run_poly_example_with_multiple_design_matrix_instances():
num_realizations = 10
a_values = list(range(num_realizations))
_create_design_matrix(
"poly_design_1.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"a": a_values,
}
),
pd.DataFrame([["b", 1], ["c", 2]]),
)
_create_design_matrix(
"poly_design_2.xlsx",
pd.DataFrame(
{
"REAL": list(range(num_realizations)),
"d": num_realizations * [3],
}
),
pd.DataFrame([["g", 4]]),
)

with open("poly.ert", "w", encoding="utf-8") as fout:
fout.write(
dedent(
"""\
QUEUE_OPTION LOCAL MAX_RUNNING 10
RUNPATH poly_out/realization-<IENS>/iter-<ITER>
NUM_REALIZATIONS 10
MIN_REALIZATIONS 1
GEN_DATA POLY_RES RESULT_FILE:poly.out
DESIGN_MATRIX poly_design_1.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet
DESIGN_MATRIX poly_design_2.xlsx DESIGN_SHEET:DesignSheet01 DEFAULT_SHEET:DefaultSheet
INSTALL_JOB poly_eval POLY_EVAL
FORWARD_MODEL poly_eval
"""
)
)

with open("poly_eval.py", "w", encoding="utf-8") as f:
f.write(
dedent(
"""\
#!/usr/bin/env python
import json

def _load_coeffs(filename):
with open(filename, encoding="utf-8") as f:
return json.load(f)["DESIGN_MATRIX"]

def _evaluate(coeffs, x):
return coeffs["a"] * x**2 + coeffs["b"] * x + coeffs["c"]

if __name__ == "__main__":
coeffs = _load_coeffs("parameters.json")
output = [_evaluate(coeffs, x) for x in range(10)]
with open("poly.out", "w", encoding="utf-8") as f:
f.write("\\n".join(map(str, output)))
"""
)
)
os.chmod(
"poly_eval.py",
os.stat("poly_eval.py").st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH,
)

run_cli(
ENSEMBLE_EXPERIMENT_MODE,
"--disable-monitor",
"poly.ert",
"--experiment-name",
"test-experiment",
)
storage_path = ErtConfig.from_file("poly.ert").ens_path
with open_storage(storage_path) as storage:
experiment = storage.get_experiment_by_name("test-experiment")
params = experiment.get_ensemble_by_name("default").load_parameters(
"DESIGN_MATRIX"
)["values"]
np.testing.assert_array_equal(params[:, 0], a_values)
np.testing.assert_array_equal(params[:, 1], 10 * [1])
np.testing.assert_array_equal(params[:, 2], 10 * [2])
np.testing.assert_array_equal(params[:, 3], 10 * [3])
np.testing.assert_array_equal(params[:, 4], 10 * [4])
32 changes: 20 additions & 12 deletions tests/ert/unit_tests/config/test_analysis_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ def test_analysis_config_from_file_is_same_as_from_dict(monkeypatch, tmp_path):
("STD_ENKF", "ENKF_TRUNCATION", 0.8),
],
ConfigKeys.DESIGN_MATRIX: [
"my_design_matrix.xlsx",
"DESIGN_SHEET:my_sheet",
"DEFAULT_SHEET:my_default_sheet",
[
"my_design_matrix.xlsx",
"DESIGN_SHEET:my_sheet",
"DEFAULT_SHEET:my_default_sheet",
]
],
}
)
Expand Down Expand Up @@ -110,9 +112,11 @@ def test_invalid_design_matrix_format_raises_validation_error():
{
ConfigKeys.NUM_REALIZATIONS: 1,
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.txt",
"DESIGN_SHEET:sheet1",
"DEFAULT_SHEET:sheet2",
[
"my_matrix.txt",
"DESIGN_SHEET:sheet1",
"DEFAULT_SHEET:sheet2",
],
],
}
)
Expand All @@ -123,9 +127,11 @@ def test_design_matrix_without_design_sheet_raises_validation_error():
AnalysisConfig.from_dict(
{
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.xlsx",
"DESIGN_:design",
"DEFAULT_SHEET:default",
[
"my_matrix.xlsx",
"DESIGN_:design",
"DEFAULT_SHEET:default",
]
],
}
)
Expand All @@ -136,9 +142,11 @@ def test_design_matrix_without_default_sheet_raises_validation_error():
AnalysisConfig.from_dict(
{
ConfigKeys.DESIGN_MATRIX: [
"my_matrix.xlsx",
"DESIGN_SHEET:design",
"DEFAULT_:default",
[
"my_matrix.xlsx",
"DESIGN_SHEET:design",
"DEFAULT_:default",
]
],
}
)
Expand Down
73 changes: 73 additions & 0 deletions tests/ert/unit_tests/sensitivity_analysis/test_design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,79 @@
from ert.config.gen_kw_config import GenKwConfig, TransformFunctionDefinition


def _create_design_matrix(xls_path, design_matrix_df, default_sheet_df) -> DesignMatrix:
with pd.ExcelWriter(xls_path) as xl_write:
design_matrix_df.to_excel(xl_write, index=False, sheet_name="DesignSheet01")
default_sheet_df.to_excel(
xl_write, index=False, sheet_name="DefaultValues", header=False
)
return DesignMatrix(xls_path, "DesignSheet01", "DefaultValues")


@pytest.mark.parametrize(
"design_sheet_pd, default_sheet_pd, error_msg",
[
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1, 2],
"c": [1, 2, 3],
"d": [0, 2, 0],
}
),
pd.DataFrame([["e", 1]]),
"",
id="ok_merge",
),
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1, 2],
"a": [1, 2, 3],
}
),
pd.DataFrame([["e", 1]]),
"Design Matrices do not have unique keys",
id="not_unique_keys",
),
pytest.param(
pd.DataFrame(
{
"REAL": [0, 1],
"d": [1, 2],
}
),
pd.DataFrame([["e", 1]]),
"Design Matrices don't have the same active realizations!",
id="not_same_acitve_realizations",
),
],
)
def test_merge_multiple_occurrences(
tmp_path, design_sheet_pd, default_sheet_pd, error_msg
):
design_matrix_1 = _create_design_matrix(
tmp_path / "design_matrix_1.xlsx",
pd.DataFrame(
{
"REAL": [0, 1, 2],
"a": [1, 2, 3],
"b": [0, 2, 0],
},
),
pd.DataFrame([["a", 1], ["b", 4]]),
)

design_matrix_2 = _create_design_matrix(
tmp_path / "design_matrix_2.xlsx", design_sheet_pd, default_sheet_pd
)
if error_msg:
with pytest.raises(ValueError, match=error_msg):
design_matrix_1.merge_with_other(design_matrix_2)
else:
design_matrix_1.merge_with_other(design_matrix_2)


@pytest.mark.parametrize(
"parameters, error_msg",
[
Expand Down
Loading