From a161f0e68c9ddd0ecb7e2503339a772f7fd04d54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Berland?= Date: Thu, 24 Oct 2024 15:22:20 +0200 Subject: [PATCH] Decrease LSF polling rates This is to mitigate temporal bottleneck issues with the LSF grid server. --- src/ert/scheduler/lsf_driver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ert/scheduler/lsf_driver.py b/src/ert/scheduler/lsf_driver.py index 7c7e364a2dc..264f95df0c4 100644 --- a/src/ert/scheduler/lsf_driver.py +++ b/src/ert/scheduler/lsf_driver.py @@ -30,7 +30,7 @@ from .driver import SIGNAL_OFFSET, Driver from .event import Event, FinishedEvent, StartedEvent -_POLL_PERIOD = 2.0 # seconds +_POLL_PERIOD = 4.0 # seconds LSF_FAILED_JOB = SIGNAL_OFFSET + 65 # first non signal returncode """Return code we use when lsf reports failed jobs""" @@ -263,8 +263,8 @@ def __init__( self._iens2jobid: MutableMapping[int, str] = {} self._max_attempt: int = 100 self._sleep_time_between_bkills = 30 - self._sleep_time_between_cmd_retries = 3 - self._bsub_retries = 10 + self._sleep_time_between_cmd_retries = 6 + self._bsub_retries = 20 self._poll_period = _POLL_PERIOD