Skip to content

Commit

Permalink
handle stopped tasks without throwing error (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
mshannon-sil authored Nov 9, 2023
1 parent 2174c7d commit daa1b8e
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
7 changes: 5 additions & 2 deletions machine/jobs/build_nmt_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def run(args: dict) -> None:
task = Task.init()

def clearml_check_canceled() -> None:
if task.get_status() in {"stopped", "stopping"}:
if task.get_status() == "stopped":
raise CanceledError

check_canceled = clearml_check_canceled
Expand Down Expand Up @@ -72,7 +72,10 @@ def clearml_progress(status: ProgressStatus) -> None:
logger.info("Finished")
except Exception as e:
if task:
task.mark_failed(status_reason=type(e).__name__, status_message=str(e))
if task.get_status() == "stopped":
return
else:
task.mark_failed(status_reason=type(e).__name__, status_message=str(e))
raise e


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -382,21 +382,21 @@ def __init__(
self._check_canceled = check_canceled

def on_train_begin(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs) -> None:
if self._check_canceled is not None:
self._check_canceled()

if self._progress is not None and state.is_local_process_zero:
self._progress(
ProgressStatus(0) if self._max_steps is None else ProgressStatus.from_step(0, self._max_steps)
)

def on_step_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs) -> None:
if self._check_canceled is not None:
self._check_canceled()

def on_step_end(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs) -> None:
if self._progress is not None and state.is_local_process_zero:
self._progress(
ProgressStatus(state.global_step)
if self._max_steps is None
else ProgressStatus.from_step(state.global_step, self._max_steps)
)

if self._check_canceled is not None:
self._check_canceled()

0 comments on commit daa1b8e

Please sign in to comment.