diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a0e1f175..f7206dd05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## neptune-client 0.13.4 + +### Fixes +- Fix issue that prevented waiting for subprocesses to finish after receiving stop signal from backend ([#774](https://github.com/neptune-ai/neptune-client/pull/774)); + Timeout now overridable using environment var `NEPTUNE_SUBPROCESS_KILL_TIMEOUT` + ## neptune-client 0.13.3 ### Fixes diff --git a/neptune/new/envs.py b/neptune/new/envs.py index 80f56e619..7718d59a4 100644 --- a/neptune/new/envs.py +++ b/neptune/new/envs.py @@ -32,3 +32,5 @@ NEPTUNE_RETRIES_TIMEOUT_ENV = "NEPTUNE_RETRIES_TIMEOUT" NEPTUNE_SYNC_BATCH_TIMEOUT_ENV = "NEPTUNE_SYNC_BATCH_TIMEOUT" + +NEPTUNE_SUBPROCESS_KILL_TIMEOUT = "NEPTUNE_SUBPROCESS_KILL_TIMEOUT" diff --git a/neptune/new/internal/utils/process_killer.py b/neptune/new/internal/utils/process_killer.py index d82b5df20..5106d8021 100644 --- a/neptune/new/internal/utils/process_killer.py +++ b/neptune/new/internal/utils/process_killer.py @@ -17,6 +17,8 @@ import os import signal +from neptune.new.envs import NEPTUNE_SUBPROCESS_KILL_TIMEOUT + try: import psutil @@ -25,22 +27,24 @@ PSUTIL_INSTALLED = False -KILL_TIMEOUT = 5 +KILL_TIMEOUT = int(os.getenv(NEPTUNE_SUBPROCESS_KILL_TIMEOUT, "5")) def kill_me(): if PSUTIL_INSTALLED: process = psutil.Process(os.getpid()) try: - children = _get_process_children(process) + [process] + children = _get_process_children(process) except psutil.NoSuchProcess: children = [] - for process in children: - _terminate(process) + for child_proc in children: + _terminate(child_proc) _, alive = psutil.wait_procs(children, timeout=KILL_TIMEOUT) - for process in alive: - _kill(process) + for child_proc in alive: + _kill(child_proc) + # finish with terminating self + _terminate(process) else: os.kill(os.getpid(), signal.SIGINT)