From 7fc2079279c66e82a2d0008b3b7d8018cfbae5c8 Mon Sep 17 00:00:00 2001 From: Aleksandr Movchan Date: Thu, 31 Oct 2024 10:45:53 +0000 Subject: [PATCH 1/2] Update retryable_exceptions to include ActorDiedError and OutOfMemoryError --- aana/sdk.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/aana/sdk.py b/aana/sdk.py index a49317e6..ea9554f1 100644 --- a/aana/sdk.py +++ b/aana/sdk.py @@ -45,7 +45,7 @@ def __init__( name (str, optional): The name of the application. Defaults to "app". migration_func (Callable | None): The migration function to run. Defaults to None. retryable_exceptions (list[Exception, str] | None): The exceptions that can be retried in the task queue. - Defaults to ['InferenceException']. + Defaults to ['InferenceException', 'ActorDiedError', 'OutOfMemoryError']. """ self.name = name self.migration_func = migration_func @@ -53,7 +53,11 @@ def __init__( self.deployments: dict[str, Deployment] = {} if retryable_exceptions is None: - self.retryable_exceptions = [InferenceException] + self.retryable_exceptions = [ + "InferenceException", + "ActorDiedError", + "OutOfMemoryError", + ] else: self.retryable_exceptions = retryable_exceptions # Convert exceptions to string if they are not already From e4586473a8e5eb56d65f6f14cf863a0527b38e0c Mon Sep 17 00:00:00 2001 From: Aleksandr Movchan Date: Thu, 31 Oct 2024 10:53:11 +0000 Subject: [PATCH 2/2] Cap exponential backoff attempts to prevent overflow in sleep_exponential_backoff function --- aana/utils/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aana/utils/core.py b/aana/utils/core.py index 31d89236..6e27dea9 100644 --- a/aana/utils/core.py +++ b/aana/utils/core.py @@ -93,7 +93,9 @@ async def sleep_exponential_backoff( attempts (int): The number of attempts so far. jitter (bool): Whether to add jitter to the delay. Default is True. """ - delay = min(initial_delay * (2**attempts), max_delay) + # Prevent overflow by using min(attempt, 32) since 2^32 is already huge + capped_attempt = min(attempts, 32) + delay = min(initial_delay * (2**capped_attempt), max_delay) # Full jitter delay_with_jitter = random.uniform(0, delay) if jitter else delay # noqa: S311 await asyncio.sleep(delay_with_jitter)