Skip to content

Commit

Permalink
resource requester timeout handling
Browse files Browse the repository at this point in the history
  • Loading branch information
bernardusrendy committed Apr 21, 2024
1 parent 2735ebb commit 624c5e7
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 16 deletions.
1 change: 0 additions & 1 deletion alab_management/lab_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ def request_resources(
request_id = result["request_id"]
timeout_error = result["timeout_error"]
if timeout_error:
self._resource_requester.release_resources(request_id=request_id)
raise TimeoutError
else:
devices = result["devices"]
Expand Down
33 changes: 20 additions & 13 deletions alab_management/task_manager/resource_requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,20 +228,27 @@ def request_resources(
]:
if timeout is not None and time.time() - start_time > timeout:
raise TimeoutError
time.sleep(0.5)
remaining_time = (
float(timeout - (time.time() - start_time)) if timeout else None
time.sleep(0.1)

result = f.result(timeout=None)
except (
TimeoutError
): # cancel the task if timeout, make sure it is not fulfilled
if (
self.get_request(_id, projection=["status"])["status"]
!= RequestStatus.FULFILLED.name
):
self.update_request_status(
request_id=_id, status=RequestStatus.CANCELED
)

result = f.result(timeout=remaining_time)
except TimeoutError: # cancel the task if timeout
self.update_request_status(request_id=_id, status=RequestStatus.CANCELED)
# wait for the request status to be updated
while (self.get_request(_id, projection=["status"]))[
"status"
] != RequestStatus.CANCELED.name:
time.sleep(0.5)
return {"request_id": _id, "timeout_error": True}
# wait for the request status to be updated
while (self.get_request(_id, projection=["status"]))[
"status"
] != RequestStatus.CANCELED.name:
time.sleep(0.5)
return {"request_id": _id, "timeout_error": True}
else: # if the request is fulfilled, return the result normally, wrong timeout
result = f.result(timeout=None)
return {
**self._post_process_requested_resource(
devices=result["devices"],
Expand Down
2 changes: 0 additions & 2 deletions alab_management/task_manager/task_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,6 @@ def _handle_requested_resources(self, request_entry: dict[str, Any]):
]
== RequestStatus.ERROR.name
):
self._release_devices(devices)
self._release_sample_positions(sample_positions)
return
time.sleep(0.5)
# label the resources as occupied
Expand Down

0 comments on commit 624c5e7

Please sign in to comment.