Skip to content

Commit

Permalink
Implement the PMIX_JOB_CHILD_SEP support
Browse files Browse the repository at this point in the history
Provide an optional way to determine the fate of
child jobs if/when the parent job terminates. In
the current implementation, we do not support
continuation after parent job abnormally
terminates - has to be a normal termination.

Users can toggle the behavior by providing
the PMIX_JOB_CHILD_SEP attribute in their
job info passed to PMIx_Spawn. In the absence
of that attribute, we default to allowing the
child to continue executing.

Provide an output when child jobs are terminated
warning the user that this has happened, and why.

Signed-off-by: Ralph Castain <[email protected]>
(cherry picked from commit 747da04)
  • Loading branch information
rhc54 committed Oct 18, 2024
1 parent 217763c commit ea07a62
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 12 deletions.
17 changes: 17 additions & 0 deletions src/docs/show-help-files/help-state-base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,20 @@ behavior:

You must specify one of the above in combination with NOTIFYERRORS in order
to receive notifications of errors. Please correct the situation and try again.
#
[child-term]
At least one child job is being terminated due to termination of
its parent:

Parent: %s
Child: %s

This behavior is controlled by setting the PMIX_JOB_CHILD_SEP attribute
in the job info provided at time of spawn for the child job. When set to
"true", the runtime will "separate" the child from its parent and allow
it to continue execution after parent termination. Note that this is only
true for parents that normally terminate - abnormal termination will always
result in a complete teardown of all child jobs.

In the absence of the attribute, the runtime will default to the "true"
behavior.
19 changes: 18 additions & 1 deletion src/mca/state/base/help-state-base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (c) 2018 Research Organization for Information Science
# and Technology (RIST). All rights reserved.
# Copyright (c) 2019 Intel, Inc. All rights reserved.
# Copyright (c) 2022 Nanook Consulting. All rights reserved.
# Copyright (c) 2022-2024 Nanook Consulting All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down Expand Up @@ -40,3 +40,20 @@ behavior:

You must specify one of the above in combination with NOTIFYERRORS in order
to receive notifications of errors. Please correct the situation and try again.
#
[child-term]
At least one child job is being terminated due to termination of
its parent:

Parent: %s
Child: %s

This behavior is controlled by setting the PMIX_JOB_CHILD_SEP attribute
in the job info provided at time of spawn for the child job. When set to
"true", the runtime will "separate" the child from its parent and allow
it to continue execution after parent termination. Note that this is only
true for parents that normally terminate - abnormal termination will always
result in a complete teardown of all child jobs.

In the absence of the attribute, the runtime will default to the "true"
behavior.
35 changes: 24 additions & 11 deletions src/mca/state/dvm/state_dvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "src/util/pmix_output.h"
#include "src/util/proc_info.h"
#include "src/util/session_dir.h"
#include "src/util/pmix_show_help.h"

#include "src/mca/errmgr/errmgr.h"
#include "src/mca/filem/filem.h"
Expand Down Expand Up @@ -505,7 +506,7 @@ static void check_complete(int fd, short args, void *cbdata)
prte_state_caddy_t *caddy = (prte_state_caddy_t *) cbdata;
prte_job_t *jdata, *jptr;
prte_proc_t *proc;
int i, rc;
int i, rc, nprocs;
prte_node_t *node;
prte_job_map_t *map;
int32_t index;
Expand All @@ -519,7 +520,7 @@ static void check_complete(int fd, short args, void *cbdata)
hwloc_obj_t obj;
hwloc_obj_type_t type;
hwloc_cpuset_t boundcpus, tgt;
bool takeall;
bool takeall, sep, *sepptr = &sep;
PRTE_HIDE_UNUSED_PARAMS(fd, args);

PMIX_ACQUIRE_OBJECT(caddy);
Expand Down Expand Up @@ -815,21 +816,33 @@ static void check_complete(int fd, short args, void *cbdata)
prte_state_base_check_fds(jdata);
}

/* if this job was a launcher, then we need to abort all of its
* child jobs that might still be running */
/* if this job started child jobs, then we need to abort all of its
* child jobs that might still be running unless designated to
* run independently of their parent */
if (0 < pmix_list_get_size(&jdata->children)) {
PMIX_CONSTRUCT(&procs, pmix_pointer_array_t);
pmix_pointer_array_init(&procs, 1, INT_MAX, 1);
nprocs = 0;
PMIX_LIST_FOREACH(jptr, &jdata->children, prte_job_t)
{
proc = PMIX_NEW(prte_proc_t);
PMIX_LOAD_PROCID(&proc->name, jptr->nspace, PMIX_RANK_WILDCARD);
pmix_pointer_array_add(&procs, proc);
if (prte_get_attribute(&jptr->attributes, PRTE_JOB_CHILD_SEP, (void**)&sepptr, PMIX_BOOL) && !sep) {
proc = PMIX_NEW(prte_proc_t);
PMIX_LOAD_PROCID(&proc->name, jptr->nspace, PMIX_RANK_WILDCARD);
pmix_pointer_array_add(&procs, proc);
++nprocs;
if (1 == nprocs) {
// output a warning message that at least one child is being terminated
pmix_show_help("help-state-base.txt", "child-term", true,
jdata->nspace, jptr->nspace);
}
}
}
prte_plm.terminate_procs(&procs);
for (i = 0; i < procs.size; i++) {
if (NULL != (proc = (prte_proc_t *) pmix_pointer_array_get_item(&procs, i))) {
PMIX_RELEASE(proc);
if (0 < nprocs) {
prte_plm.terminate_procs(&procs);
for (i = 0; i < procs.size; i++) {
if (NULL != (proc = (prte_proc_t *) pmix_pointer_array_get_item(&procs, i))) {
PMIX_RELEASE(proc);
}
}
}
PMIX_DESTRUCT(&procs);
Expand Down
6 changes: 6 additions & 0 deletions src/prted/pmix/pmix_server_dyn.c
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,12 @@ static void interim(int sd, short args, void *cbdata)
prte_set_attribute(&jdata->attributes, PRTE_JOB_CONTINUOUS, PRTE_ATTR_GLOBAL,
&flag, PMIX_BOOL);

/*** CHILD INDEPENDENCE ***/
} else if (PMIX_CHECK_KEY(info, PMIX_SPAWN_CHILD_SEP)) {
flag = PMIX_INFO_TRUE(info);
prte_set_attribute(&jdata->attributes, PRTE_JOB_CHILD_SEP, PRTE_ATTR_GLOBAL,
&flag, PMIX_BOOL);

/*** MAX RESTARTS ***/
} else if (PMIX_CHECK_KEY(info, PMIX_MAX_RESTARTS)) {
for (i = 0; i < jdata->apps->size; i++) {
Expand Down
2 changes: 2 additions & 0 deletions src/util/attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,8 @@ const char *prte_attr_key_to_str(prte_attribute_key_t key)
return "DISPLAY PARSEABLE OUTPUT";
case PRTE_JOB_EXTEND_DVM:
return "EXTEND DVM";
case PRTE_JOB_CHILD_SEP:
return "CHILD SEP";

case PRTE_PROC_NOBARRIER:
return "PROC-NOBARRIER";
Expand Down
3 changes: 3 additions & 0 deletions src/util/attr.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ typedef uint16_t prte_job_flags_t;
// are to be displayed
#define PRTE_JOB_DISPLAY_PARSEABLE_OUTPUT (PRTE_JOB_START_KEY + 110) // bool - display output in machine parsable format
#define PRTE_JOB_EXTEND_DVM (PRTE_JOB_START_KEY + 111) // bool - DVM is being extended
#define PRTE_JOB_CHILD_SEP (PRTE_JOB_START_KEY + 116) // bool - child job is to be considered independent
// from its parent, do not terminate if
// parent dies first

#define PRTE_JOB_MAX_KEY (PRTE_JOB_START_KEY + 200)

Expand Down

0 comments on commit ea07a62

Please sign in to comment.