From b39a7832f1a53c2a03ec5fb2c973ba2cfa605f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Wed, 30 Oct 2024 15:36:08 +0100 Subject: [PATCH 1/2] Change event names from job to step --- src/_ert/events.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/_ert/events.py b/src/_ert/events.py index 99033c7e833..418de54b104 100644 --- a/src/_ert/events.py +++ b/src/_ert/events.py @@ -11,16 +11,16 @@ class Id: - FORWARD_MODEL_STEP_START_TYPE = Literal["forward_model_job.start"] - FORWARD_MODEL_STEP_RUNNING_TYPE = Literal["forward_model_job.running"] - FORWARD_MODEL_STEP_SUCCESS_TYPE = Literal["forward_model_job.success"] - FORWARD_MODEL_STEP_FAILURE_TYPE = Literal["forward_model_job.failure"] - FORWARD_MODEL_STEP_CHECKSUM_TYPE = Literal["forward_model_job.checksum"] - FORWARD_MODEL_STEP_START: Final = "forward_model_job.start" - FORWARD_MODEL_STEP_RUNNING: Final = "forward_model_job.running" - FORWARD_MODEL_STEP_SUCCESS: Final = "forward_model_job.success" - FORWARD_MODEL_STEP_FAILURE: Final = "forward_model_job.failure" - FORWARD_MODEL_STEP_CHECKSUM: Final = "forward_model_job.checksum" + FORWARD_MODEL_STEP_START_TYPE = Literal["forward_model_step.start"] + FORWARD_MODEL_STEP_RUNNING_TYPE = Literal["forward_model_step.running"] + FORWARD_MODEL_STEP_SUCCESS_TYPE = Literal["forward_model_step.success"] + FORWARD_MODEL_STEP_FAILURE_TYPE = Literal["forward_model_step.failure"] + FORWARD_MODEL_STEP_CHECKSUM_TYPE = Literal["forward_model_step.checksum"] + FORWARD_MODEL_STEP_START: Final = "forward_model_step.start" + FORWARD_MODEL_STEP_RUNNING: Final = "forward_model_step.running" + FORWARD_MODEL_STEP_SUCCESS: Final = "forward_model_step.success" + FORWARD_MODEL_STEP_FAILURE: Final = "forward_model_step.failure" + FORWARD_MODEL_STEP_CHECKSUM: Final = "forward_model_step.checksum" REALIZATION_FAILURE_TYPE = Literal["realization.failure"] REALIZATION_PENDING_TYPE = Literal["realization.pending"] From 680024ed6edccc0deaa53c422c48fc869765bd4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Wed, 30 Oct 2024 15:45:21 +0100 Subject: [PATCH 2/2] Log runtime for individual forward model steps This is meant for making statistics pr forward model step name. While testing on poly_example and local queue, there is some odd occurences of the logged message that start_time is None, this is not believed to happen often in production. --- src/ert/ensemble_evaluator/snapshot.py | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/ert/ensemble_evaluator/snapshot.py b/src/ert/ensemble_evaluator/snapshot.py index 35e0157944f..fe181be196d 100644 --- a/src/ert/ensemble_evaluator/snapshot.py +++ b/src/ert/ensemble_evaluator/snapshot.py @@ -1,3 +1,4 @@ +import logging import sys import typing from collections import defaultdict @@ -46,6 +47,8 @@ from ert.ensemble_evaluator import identifiers as ids from ert.ensemble_evaluator import state +logger = logging.getLogger(__name__) + if sys.version_info < (3, 11): from backports.datetime_fromisoformat import MonkeyPatch # type: ignore @@ -327,6 +330,39 @@ def update_from_event( start_time = convert_iso8601_to_datetime(timestamp) elif e_type in {ForwardModelStepSuccess, ForwardModelStepFailure}: end_time = convert_iso8601_to_datetime(timestamp) + + try: + start_time = self._fm_step_snapshots[event.real, event.fm_step].get( + "start_time" + ) + cpu_seconds = self._fm_step_snapshots[ + event.real, event.fm_step + ].get("cpu_seconds") + fm_step_name = source_snapshot.reals[event.real]["fm_steps"][ + event.fm_step + ]["name"] + if start_time is not None: + logger.warning( + f"{event.event_type} {fm_step_name} " + f"walltime={(end_time - start_time).total_seconds()} " + f"cputime={cpu_seconds} " + f"ensemble={event.ensemble} " + f"step_index={event.fm_step} " + f"real={event.real}" + ) + else: + logger.warning( + f"Should log fm_step runtime, but start_time was None, " + f"{event.event_type} {fm_step_name=} " + f"endtime={end_time.isoformat()} " + f"cputime={cpu_seconds} " + f"ensemble={event.ensemble} " + f"step_index={event.fm_step} " + f"real={event.real}" + ) + except BaseException as e: + logger.warning(f"Should log fm_step runtime, but got exception {e}") + if type(event) is ForwardModelStepFailure: error = event.error_msg if event.error_msg else "" else: