-
Notifications
You must be signed in to change notification settings - Fork 199
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Drain workers as walltime expiry nears (#3063)
This is an implementation of the first part (the "easy part") of issue #3059. It adds a parameter to HighThroughputExecutor specifying a "not quite as long as walltime" parameter, after which time workers will drain themselves: they will continue with existing tasks but ask the interchange to not send any more. When they are drained, the worker pools will exit immediately.
- Loading branch information
1 parent
822f060
commit 920852f
Showing
5 changed files
with
155 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
import parsl | ||
import pytest | ||
import time | ||
|
||
from parsl.providers import LocalProvider | ||
from parsl.channels import LocalChannel | ||
from parsl.launchers import SimpleLauncher | ||
|
||
from parsl.config import Config | ||
from parsl.executors import HighThroughputExecutor | ||
|
||
# this constant is used to scale some durations that happen | ||
# based around the expected drain period: the drain period | ||
# is TIME_CONST seconds, and the single executed task will | ||
# last twice that many number of seconds. | ||
TIME_CONST = 1 | ||
|
||
|
||
def local_config(): | ||
return Config( | ||
executors=[ | ||
HighThroughputExecutor( | ||
label="htex_local", | ||
drain_period=TIME_CONST, | ||
worker_debug=True, | ||
cores_per_worker=1, | ||
encrypted=True, | ||
provider=LocalProvider( | ||
channel=LocalChannel(), | ||
init_blocks=1, | ||
min_blocks=0, | ||
max_blocks=0, | ||
launcher=SimpleLauncher(), | ||
), | ||
) | ||
], | ||
strategy='none', | ||
) | ||
|
||
|
||
@parsl.python_app | ||
def f(n): | ||
import time | ||
time.sleep(n) | ||
|
||
|
||
@pytest.mark.local | ||
def test_drain(try_assert): | ||
|
||
htex = parsl.dfk().executors['htex_local'] | ||
|
||
# wait till we have a block running... | ||
|
||
try_assert(lambda: len(htex.connected_managers()) == 1) | ||
|
||
managers = htex.connected_managers() | ||
assert managers[0]['active'], "The manager should be active" | ||
assert not managers[0]['draining'], "The manager should not be draining" | ||
|
||
fut = f(TIME_CONST * 2) | ||
|
||
time.sleep(TIME_CONST) | ||
|
||
# this assert should happen *very fast* after the above delay... | ||
try_assert(lambda: htex.connected_managers()[0]['draining'], timeout_ms=500) | ||
|
||
# and the test task should still be running... | ||
assert not fut.done(), "The test task should still be running" | ||
|
||
fut.result() | ||
|
||
# and now we should see the manager disappear... | ||
# ... with strategy='none', this should be coming from draining but | ||
# that information isn't immediately obvious from the absence in | ||
# connected managers. | ||
# As with the above draining assert, this should happen very fast after | ||
# the task ends. | ||
try_assert(lambda: len(htex.connected_managers()) == 0, timeout_ms=500) |