equinor · eivindjahren · Jan 3, 2025 · Dec 27, 2024 · Dec 18, 2024
diff --git a/src/ert/analysis/_es_update.py b/src/ert/analysis/_es_update.py
@@ -202,7 +202,7 @@ def _load_observations_and_responses(
 
     # Identifies non-outlier observations based on responses.
     ens_mean = S.mean(axis=1)
-    ens_std = S.std(ddof=0, axis=1)
+    ens_std = S.std(ddof=0, axis=1, dtype=np.float64).astype(np.float32)
     ens_std_mask = ens_std > std_cutoff
     ens_mean_mask = abs(observations - ens_mean) <= alpha * (ens_std + scaled_errors)
     obs_mask = np.logical_and(ens_mean_mask, ens_std_mask)

diff --git a/tests/ert/ui_tests/cli/test_update.py b/tests/ert/ui_tests/cli/test_update.py
@@ -0,0 +1,241 @@
+from __future__ import annotations
+
+import os
+import stat
+import warnings
+from pathlib import Path
+
+import hypothesis.strategies as st
+import numpy as np
+from hypothesis import given, note, settings
+from pytest import MonkeyPatch, TempPathFactory
+
+from ert.cli.main import ErtCliError
+from ert.config.gen_kw_config import DISTRIBUTION_PARAMETERS
+from ert.mode_definitions import ENSEMBLE_SMOOTHER_MODE
+from ert.storage import open_storage
+
+from .run_cli import run_cli_with_pm
+
+names = st.text(
+    min_size=1,
+    max_size=8,
+    alphabet=st.characters(
+        min_codepoint=ord("!"),
+        max_codepoint=ord("~"),
+        exclude_characters="\"'$,:%",  # These have specific meaning in configs
+    ),
+)
+
+
+@st.composite
+def distribution_values(draw, k, vs):
+    d = {}
+    biggest = 100.0
+    if "LOG" in k:
+        biggest = 10.0
+    epsilon = biggest / 1000.0
+    if "MIN" in vs:
+        d["MIN"] = draw(st.floats(min_value=epsilon, max_value=biggest / 10.0))
+    if "MAX" in vs:
+        d["MAX"] = draw(st.floats(min_value=d["MIN"] + 5 * epsilon, max_value=biggest))
+    if "MEAN" in vs:
+        d["MEAN"] = draw(
+            st.floats(
+                min_value=d.get("MIN", 2 * epsilon) + epsilon,
+                max_value=d.get("MAX", biggest) - epsilon,
+            )
+        )
+    if "MODE" in vs:
+        d["MODE"] = draw(
+            st.floats(
+                min_value=d.get("MIN", 2 * epsilon) + epsilon,
+                max_value=d.get("MAX", biggest) - epsilon,
+            )
+        )
+    if "STEPS" in vs:
+        d["STEPS"] = draw(st.integers(min_value=2, max_value=10))
+    return [d.get(v, draw(st.floats(min_value=0.1, max_value=1.0))) for v in vs]
+
+
+distributions = st.one_of(
+    [
+        st.tuples(
+            st.just(k),
+            distribution_values(k, vs),
+        )
+        for k, vs in DISTRIBUTION_PARAMETERS.items()
+    ]
+)
+
+config_contents = """\
+NUM_REALIZATIONS {num_realizations}
+QUEUE_SYSTEM LOCAL
+QUEUE_OPTION LOCAL MAX_RUNNING {num_realizations}
+ENSPATH storage
+RANDOM_SEED 1234
+
+OBS_CONFIG observations
+GEN_KW COEFFS coeff_priors
+GEN_DATA POLY_RES RESULT_FILE:poly.out
+
+INSTALL_JOB poly_eval POLY_EVAL
+FORWARD_MODEL poly_eval
+
+ANALYSIS_SET_VAR OBSERVATIONS AUTO_SCALE *
+"""
+
+coeff_priors = """\
+coeff_0 {distribution0} 0 1
+coeff_1 {distribution1} 0 2
+coeff_2 {distribution2} 0 5
+"""
+
+observation = """
+GENERAL_OBSERVATION POLY_OBS_{i} {{
+        DATA       = POLY_RES;
+        INDEX_FILE = index_{i}.txt;
+        OBS_FILE   = poly_obs_{i}.txt;
+    }};
+"""
+
+poly_eval = """\
+#!/usr/bin/env python3
+import json
+import numpy as np
+coeffs = json.load(open("parameters.json"))["COEFFS"]
+c = [np.array(coeffs[f"coeff_" + str(i)]) for i in range(len(coeffs))]
+with open("poly.out", "w", encoding="utf-8") as f:
+    f.write("\\n".join(map(str, [np.polyval(c, x) for x in range({num_points})])))
+"""
+
+POLY_EVAL = "EXECUTABLE poly_eval.py"
+
+
+@settings(max_examples=10)
+@given(
+    num_realizations=st.integers(min_value=20, max_value=40),
+    num_points=st.integers(min_value=1, max_value=20),
+    distributions=st.lists(distributions, min_size=1, max_size=10),
+    data=st.data(),
+)
+def test_update_lowers_generalized_variance_or_deactives_observations(
+    tmp_path_factory: TempPathFactory,
+    num_realizations: int,
+    num_points: int,
+    distributions: list[tuple[str, list[float]]],
+    data,
+):
+    indecies = data.draw(
+        st.lists(
+            st.integers(min_value=0, max_value=num_points - 1),
+            min_size=1,
+            max_size=num_points,
+            unique=True,
+        )
+    )
+    values = data.draw(
+        st.lists(
+            st.floats(min_value=-10.0, max_value=10.0),
+            min_size=len(indecies),
+            max_size=len(indecies),
+        )
+    )
+    errs = data.draw(
+        st.lists(
+            st.floats(min_value=0.1, max_value=0.5),
+            min_size=len(indecies),
+            max_size=len(indecies),
+        )
+    )
+    num_groups = data.draw(st.integers(min_value=1, max_value=num_points))
+    per_group = num_points // num_groups
+    print(num_groups, num_points, per_group)
+
+    tmp_path = tmp_path_factory.mktemp("parameter_example")
+    note(f"Running in directory {tmp_path}")
+    with MonkeyPatch.context() as patch:
+        patch.chdir(tmp_path)
+        contents = config_contents.format(
+            num_realizations=num_realizations,
+        )
+        note(f"config file: {contents}")
+        Path("config.ert").write_text(contents, encoding="utf-8")
+        py = Path("poly_eval.py")
+        py.write_text(poly_eval.format(num_points=num_points))
+        mode = os.stat(py)
+        os.chmod(py, mode.st_mode | stat.S_IEXEC)
+
+        for i in range(num_groups):
+            print(f"{i * per_group } {(i + 1) * per_group}")
+            with open("observations", mode="a", encoding="utf-8") as f:
+                f.write(observation.format(i=i))
+            Path(f"poly_obs_{i}.txt").write_text(
+                "\n".join(
+                    f"{x} {y}"
+                    for x, y in zip(
+                        values[i * per_group : (i + 1) * per_group],
+                        errs[i * per_group : (i + 1) * per_group],
+                        strict=False,
+                    )
+                ),
+                encoding="utf-8",
+            )
+            Path(f"index_{i}.txt").write_text(
+                "\n".join(
+                    f"{x}" for x in indecies[i * per_group : (i + 1) * per_group]
+                ),
+                encoding="utf-8",
+            )
+
+        Path("coeff_priors").write_text(
+            "\n".join(
+                f"coeff_{i} {d} {' '.join(str(p) for p in v)}"
+                for i, (d, v) in enumerate(distributions)
+            ),
+            encoding="utf-8",
+        )
+        Path("POLY_EVAL").write_text(POLY_EVAL, encoding="utf-8")
+
+        success = True
+        with warnings.catch_warnings(record=True) as all_warnings:
+            warnings.simplefilter("always")
+            try:
+                run_cli_with_pm(
+                    [
+                        ENSEMBLE_SMOOTHER_MODE,
+                        "--disable-monitor",
+                        "--experiment-name",
+                        "experiment",
+                        "config.ert",
+                    ]
+                )
+            except ErtCliError as err:
+                success = False
+                se = str(err)
+                # "No active observations" is expected
+                # when std deviation is too low, the
+                # other errors are discussed here:
+                # https://github.com/equinor/ert/issues/9581
+                # https://github.com/equinor/ert/issues/9585
+                assert (
+                    "No active observations" in se
+                    or "math domain error" in se
+                    or "math range error" in se
+                )
+
+        if any("Ill-conditioned matrix" not in str(w.message) for w in all_warnings):
+            success = False
+
+        if success:
+            with open_storage("storage") as storage:
+                experiment = storage.get_experiment_by_name("experiment")
+                prior = experiment.get_ensemble_by_name("iter-0").load_all_gen_kw_data()
+                posterior = experiment.get_ensemble_by_name(
+                    "iter-1"
+                ).load_all_gen_kw_data()
+
+            assert (
+                np.linalg.det(posterior.cov().to_numpy())
+                <= np.linalg.det(prior.cov().to_numpy()) + 0.001
+            )
diff --git a/tests/ert/unit_tests/analysis/test_es_update.py b/tests/ert/unit_tests/analysis/test_es_update.py
@@ -27,6 +27,7 @@
 from ert.config.analysis_module import ESSettings, IESSettings
 from ert.config.gen_kw_config import TransformFunctionDefinition
 from ert.field_utils import Shape
+from ert.storage import open_storage
 
 
 @pytest.fixture
@@ -204,6 +205,129 @@ def test_update_report_with_different_observation_status_from_smoother_update(
     assert data_section.extra["Deactivated observations - outliers"] == str(outliers)
 
 
+def test_update_handles_precision_loss_in_std_dev(tmp_path):
+    """
+    This is a regression test for precision loss in calculating
+    standard deviation.
+    """
+    gen_kw = GenKwConfig(
+        name="COEFFS",
+        forward_init=False,
+        update=True,
+        template_file=None,
+        output_file=None,
+        transform_function_definitions=[
+            TransformFunctionDefinition(
+                name="coeff_0", param_name="CONST", values=["0.1"]
+            ),
+        ],
+    )
+    # The values given here are chosen so that when computing
+    # `ens_std = S.std(ddof=0, axis=1)`, ens_std[0] is not zero even though
+    # all responses have the same value: 5.08078746e07.
+    # This is due to precision loss.
+    with open_storage(tmp_path, mode="w") as storage:
+        experiment = storage.create_experiment(
+            name="ensemble_smoother",
+            parameters=[gen_kw],
+            observations={
+                "gen_data": polars.DataFrame(
+                    {
+                        "response_key": "RES",
+                        "observation_key": "OBS",
+                        "report_step": polars.Series(np.zeros(3), dtype=polars.UInt16),
+                        "index": polars.Series([0, 1, 2], dtype=polars.UInt16),
+                        "observations": polars.Series(
+                            [-218285263.28648496, -999999999.0, -107098474.0148249],
+                            dtype=polars.Float32,
+                        ),
+                        "std": polars.Series(
+                            [559437122.6211826, 999999999.9999999, 1.9],
+                            dtype=polars.Float32,
+                        ),
+                    }
+                )
+            },
+            responses=[
+                GenDataConfig(
+                    name="gen_data",
+                    input_files=["poly.out"],
+                    keys=["RES"],
+                    has_finalized_keys=True,
+                    report_steps_list=[None],
+                )
+            ],
+        )
+        prior = storage.create_ensemble(experiment.id, ensemble_size=23, name="prior")
+        for realization_nr in range(prior.ensemble_size):
+            ds = gen_kw.sample_or_load(
+                realization_nr,
+                random_seed=1234,
+                ensemble_size=prior.ensemble_size,
+            )
+            prior.save_parameters("COEFFS", realization_nr, ds)
+
+        prior.save_response(
+            "gen_data",
+            polars.DataFrame(
+                {
+                    "response_key": "RES",
+                    "report_step": polars.Series(np.zeros(3), dtype=polars.UInt16),
+                    "index": polars.Series(np.arange(3), dtype=polars.UInt16),
+                    "values": polars.Series(
+                        np.array([5.08078746e07, 4.07677769e10, 2.28002538e12]),
+                        dtype=polars.Float32,
+                    ),
+                }
+            ),
+            0,
+        )
+        for i in range(1, prior.ensemble_size):
+            prior.save_response(
+                "gen_data",
+                polars.DataFrame(
+                    {
+                        "response_key": "RES",
+                        "report_step": polars.Series(np.zeros(3), dtype=polars.UInt16),
+                        "index": polars.Series(np.arange(3), dtype=polars.UInt16),
+                        "values": polars.Series(
+                            np.array([5.08078744e07, 4.12422210e09, 1.26490794e10]),
+                            dtype=polars.Float32,
+                        ),
+                    }
+                ),
+                i,
+            )
+
+        posterior = storage.create_ensemble(
+            prior.experiment_id,
+            ensemble_size=prior.ensemble_size,
+            iteration=1,
+            name="posterior",
+            prior_ensemble=prior,
+        )
+        events = []
+
+        ss = smoother_update(
+            prior,
+            posterior,
+            experiment.observation_keys,
+            ["COEFFS"],
+            UpdateSettings(auto_scale_observations=[["OBS*"]]),
+            ESSettings(),
+            progress_callback=events.append,
+        )
+
+        assert (
+            sum(
+                1
+                for e in ss.update_step_snapshots
+                if e.status == ObservationStatus.STD_CUTOFF
+            )
+            == 1
+        )
+
+
 @pytest.mark.parametrize(
     "module, expected_gen_kw",
     [