Skip to content

Commit

Permalink
hot fix timeout
Browse files Browse the repository at this point in the history
  • Loading branch information
idocx committed May 14, 2024
1 parent 792bf4e commit 616fcde
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions alab_management/resource_manager/resource_requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,22 @@ def request_resources(
try:
result = f.result(timeout=timeout)
except concurrent.futures.TimeoutError as e:
raise CombinedTimeoutError(
f"Request {result.inserted_id} timed out after {timeout} seconds."
) from e
# if the request is not fulfilled, cancel it to make sure the resources are released
request = self._request_collection.find_one_and_update({
"_id": result.inserted_id,
"status": {"$ne": RequestStatus.FULFILLED.name}
}, {
"$set": {
"status": RequestStatus.CANCELED.name
}
})
if request is not None:
raise CombinedTimeoutError(
f"Request {result.inserted_id} timed out after {timeout} seconds."
) from e
else: # if the request is fulfilled, return the result normally, wrong timeout
result = f.result(timeout=None)

return {
**self._post_process_requested_resource(
devices=result["devices"],
Expand Down

0 comments on commit 616fcde

Please sign in to comment.