From 6e453e7a1788f76a9e3a049df69e3e3deb3048f6 Mon Sep 17 00:00:00 2001 From: Eivind Jahren Date: Wed, 18 Dec 2024 09:33:31 +0100 Subject: [PATCH] Add fuzzing of update --- tests/ert/ui_tests/cli/test_update.py | 229 ++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) create mode 100644 tests/ert/ui_tests/cli/test_update.py diff --git a/tests/ert/ui_tests/cli/test_update.py b/tests/ert/ui_tests/cli/test_update.py new file mode 100644 index 00000000000..a171b383d59 --- /dev/null +++ b/tests/ert/ui_tests/cli/test_update.py @@ -0,0 +1,229 @@ +from __future__ import annotations + +import os +import stat +from pathlib import Path + +import hypothesis.strategies as st +import numpy as np +import pytest +from hypothesis import given, note, settings +from pytest import MonkeyPatch, TempPathFactory + +from ert.cli.main import ErtCliError +from ert.config.gen_kw_config import DISTRIBUTION_PARAMETERS +from ert.mode_definitions import ENSEMBLE_SMOOTHER_MODE +from ert.storage import open_storage + +from .run_cli import run_cli_with_pm + +names = st.text( + min_size=1, + max_size=8, + alphabet=st.characters( + min_codepoint=ord("!"), + max_codepoint=ord("~"), + exclude_characters="\"'$,:%", # These have specific meaning in configs + ), +) + + +@st.composite +def distribution_values(draw, k, vs): + d = {} + biggest = 100.0 + if "LOG" in k: + biggest = 10.0 + epsilon = biggest / 1000.0 + if "MIN" in vs: + d["MIN"] = draw(st.floats(min_value=epsilon, max_value=biggest / 10.0)) + if "MAX" in vs: + d["MAX"] = draw(st.floats(min_value=d["MIN"] + 5 * epsilon, max_value=biggest)) + if "MEAN" in vs: + d["MEAN"] = draw( + st.floats( + min_value=d.get("MIN", 2 * epsilon) + epsilon, + max_value=d.get("MAX", biggest) - epsilon, + ) + ) + if "MODE" in vs: + d["MODE"] = draw( + st.floats( + min_value=d.get("MIN", 2 * epsilon) + epsilon, + max_value=d.get("MAX", biggest) - epsilon, + ) + ) + if "STEPS" in vs: + d["STEPS"] = draw(st.integers(min_value=2, max_value=10)) + return [d.get(v, draw(st.floats(min_value=0.1, max_value=1.0))) for v in vs] + + +distributions = st.one_of( + [ + st.tuples( + st.just(k), + distribution_values(k, vs), + ) + for k, vs in DISTRIBUTION_PARAMETERS.items() + ] +) + +config_contents = """\ +NUM_REALIZATIONS {num_realizations} +QUEUE_SYSTEM LOCAL +QUEUE_OPTION LOCAL MAX_RUNNING {num_realizations} +ENSPATH storage +RANDOM_SEED 1234 + +OBS_CONFIG observations +GEN_KW COEFFS coeff_priors +GEN_DATA POLY_RES RESULT_FILE:poly.out + +INSTALL_JOB poly_eval POLY_EVAL +FORWARD_MODEL poly_eval + +ANALYSIS_SET_VAR OBSERVATIONS AUTO_SCALE * +""" + +coeff_priors = """\ +coeff_0 {distribution0} 0 1 +coeff_1 {distribution1} 0 2 +coeff_2 {distribution2} 0 5 +""" + +observation = """ +GENERAL_OBSERVATION POLY_OBS_{i} {{ + DATA = POLY_RES; + INDEX_FILE = index_{i}.txt; + OBS_FILE = poly_obs_{i}.txt; + }}; +""" + +poly_eval = """\ +#!/usr/bin/env python3 +import json +import numpy as np +coeffs = json.load(open("parameters.json"))["COEFFS"] +c = [np.array(coeffs[f"coeff_" + str(i)]) for i in range(len(coeffs))] +with open("poly.out", "w", encoding="utf-8") as f: + f.write("\\n".join(map(str, [np.polyval(c, x) for x in range({num_points})]))) +""" + +POLY_EVAL = "EXECUTABLE poly_eval.py" + + +@pytest.mark.timeout(None) +@settings(max_examples=1000) +@given( + num_realizations=st.integers(min_value=20, max_value=40), + num_points=st.integers(min_value=1, max_value=20), + distributions=st.lists(distributions, min_size=1, max_size=10), + data=st.data(), +) +def test_update_lowers_generalized_variance_or_deactives_observations( + tmp_path_factory: TempPathFactory, + num_realizations: int, + num_points: int, + distributions: list[tuple[str, list[float]]], + data, +): + indecies = data.draw( + st.lists( + st.integers(min_value=0, max_value=num_points - 1), + min_size=1, + max_size=num_points, + unique=True, + ) + ) + values = data.draw( + st.lists( + st.floats(min_value=-10.0, max_value=10.0), + min_size=len(indecies), + max_size=len(indecies), + ) + ) + errs = data.draw( + st.lists( + st.floats(min_value=0.1, max_value=0.5), + min_size=len(indecies), + max_size=len(indecies), + ) + ) + num_groups = data.draw(st.integers(min_value=1, max_value=num_points)) + per_group = num_points // num_groups + print(num_groups, num_points, per_group) + + tmp_path = tmp_path_factory.mktemp("parameter_example") + note(f"Running in directory {tmp_path}") + with MonkeyPatch.context() as patch: + patch.chdir(tmp_path) + contents = config_contents.format( + num_realizations=num_realizations, + ) + note(f"config file: {contents}") + Path("config.ert").write_text(contents, encoding="utf-8") + py = Path("poly_eval.py") + py.write_text(poly_eval.format(num_points=num_points)) + mode = os.stat(py) + os.chmod(py, mode.st_mode | stat.S_IEXEC) + + for i in range(num_groups): + print(f"{i * per_group } {(i + 1) * per_group}") + with open("observations", mode="a", encoding="utf-8") as f: + f.write(observation.format(i=i)) + Path(f"poly_obs_{i}.txt").write_text( + "\n".join( + f"{x} {y}" + for x, y in zip( + values[i * per_group : (i + 1) * per_group], + errs[i * per_group : (i + 1) * per_group], + strict=False, + ) + ), + encoding="utf-8", + ) + Path(f"index_{i}.txt").write_text( + "\n".join( + f"{x}" for x in indecies[i * per_group : (i + 1) * per_group] + ), + encoding="utf-8", + ) + + Path("coeff_priors").write_text( + "\n".join( + f"coeff_{i} {d} {' '.join(str(p) for p in v)}" + for i, (d, v) in enumerate(distributions) + ), + encoding="utf-8", + ) + Path("POLY_EVAL").write_text(POLY_EVAL, encoding="utf-8") + + success = True + try: + run_cli_with_pm( + [ + ENSEMBLE_SMOOTHER_MODE, + "--disable-monitor", + "--experiment-name", + "experiment", + "config.ert", + ] + ) + except ErtCliError as err: + success = False + assert "No active observations" in str( + err + ) # or "Matrix is singular" in str(err) + + if success: + with open_storage("storage") as storage: + experiment = storage.get_experiment_by_name("experiment") + prior = experiment.get_ensemble_by_name("iter-0").load_all_gen_kw_data() + posterior = experiment.get_ensemble_by_name( + "iter-1" + ).load_all_gen_kw_data() + + assert ( + np.linalg.det(posterior.cov().to_numpy()) + <= np.linalg.det(prior.cov().to_numpy()) + 0.001 + )