Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

seize: enable support for frozen containers #2514

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions criu/fault-injection.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ int fault_injection_init(void)
fi_strategy = start;

switch (fi_strategy) {
case FI_DISABLE_FREEZE_CGROUP:
dont_use_freeze_cgroup();
case FI_COMPEL_INTERRUPT_ONLY_MODE:
set_compel_interrupt_only_mode();
break;
default:
break;
Expand Down
2 changes: 1 addition & 1 deletion criu/include/fault-injection.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ enum faults {
FI_CORRUPT_EXTREGS = 134,
FI_DONT_USE_PAGEMAP_SCAN = 135,
FI_DUMP_CRASH = 136,
FI_DISABLE_FREEZE_CGROUP = 137,
FI_COMPEL_INTERRUPT_ONLY_MODE = 137,
FI_PLUGIN_CUDA_FORCE_ENABLE = 138,
FI_MAX,
};
Expand Down
2 changes: 1 addition & 1 deletion criu/include/seize.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ extern bool alarm_timeouted(void);

extern char *task_comm_info(pid_t pid, char *comm, size_t size);
extern char *__task_comm_info(pid_t pid);
extern void dont_use_freeze_cgroup(void);
extern void set_compel_interrupt_only_mode(void);

#endif
46 changes: 26 additions & 20 deletions criu/seize.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,17 @@
#include "xmalloc.h"
#include "util.h"

static bool freeze_cgroup_disabled;
static bool compel_interrupt_only_mode;

/*
* Disables the use of freeze cgroups for process seizing, even if explicitly
* requested via the --freeze-cgroup option. This is necessary for plugins
* (e.g., CUDA) that do not function correctly when processes are frozen using
* cgroups.
* requested via the --freeze-cgroup option or already set in a frozen state.
* This is necessary for plugins (e.g., CUDA) that do not function correctly
* when processes are frozen using cgroups.
*/
void __attribute__((used)) dont_use_freeze_cgroup(void)
void __attribute__((used)) set_compel_interrupt_only_mode(void)
{
freeze_cgroup_disabled = true;
compel_interrupt_only_mode = true;
}

char *task_comm_info(pid_t pid, char *comm, size_t size)
Expand Down Expand Up @@ -410,7 +410,7 @@ static int freezer_detach(void)
{
int i;

if (!opts.freeze_cgroup || freeze_cgroup_disabled)
if (!opts.freeze_cgroup || compel_interrupt_only_mode)
return 0;

for (i = 0; i < processes_to_wait && processes_to_wait_pids; i++) {
Expand Down Expand Up @@ -505,29 +505,35 @@ static int log_unfrozen_stacks(char *root)
return 0;
}

static int check_freezer_cgroup(void)
static int prepare_freezer_for_interrupt_only_mode(void)
{
enum freezer_state state = THAWED;
int fd;
int exit_code = -1;

BUG_ON(!freeze_cgroup_disabled);
BUG_ON(!compel_interrupt_only_mode);

fd = freezer_open();
if (fd < 0)
return -1;

state = get_freezer_state(fd);
close(fd);
if (state == FREEZER_ERROR) {
return -1;
goto err;
}

origin_freezer_state = state == FREEZING ? FROZEN : state;
avagin marked this conversation as resolved.
Show resolved Hide resolved

if (state != THAWED) {
pr_err("One or more plugins are incompatible with the freezer cgroup in the FROZEN state.\n");
return -1;
pr_warn("unfreezing cgroup for plugin compatibility\n");
if (freezer_write_state(fd, THAWED))
goto err;
}

return 0;
exit_code = 0;
err:
close(fd);
return exit_code;
}

static int freeze_processes(void)
Expand Down Expand Up @@ -681,7 +687,7 @@ static int collect_children(struct pstree_item *item)
goto free;
}

if (!opts.freeze_cgroup || freeze_cgroup_disabled)
if (!opts.freeze_cgroup || compel_interrupt_only_mode)
/* fails when meets a zombie */
__ignore_value(compel_interrupt_task(pid));

Expand Down Expand Up @@ -869,7 +875,7 @@ static int collect_threads(struct pstree_item *item)

pr_info("\tSeizing %d's %d thread\n", item->pid->real, pid);

if ((!opts.freeze_cgroup || freeze_cgroup_disabled) &&
if ((!opts.freeze_cgroup || compel_interrupt_only_mode) &&
compel_interrupt_task(pid))
continue;

Expand Down Expand Up @@ -926,7 +932,7 @@ static int collect_loop(struct pstree_item *item, int (*collect)(struct pstree_i
{
int attempts = NR_ATTEMPTS, nr_inprogress = 1;

if (opts.freeze_cgroup && !freeze_cgroup_disabled)
if (opts.freeze_cgroup && !compel_interrupt_only_mode)
attempts = 1;

/*
Expand Down Expand Up @@ -1032,11 +1038,11 @@ int collect_pstree(void)

pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1);

if (opts.freeze_cgroup && !freeze_cgroup_disabled) {
if (opts.freeze_cgroup && !compel_interrupt_only_mode) {
if (freeze_processes())
goto err;
} else {
if (opts.freeze_cgroup && check_freezer_cgroup())
if (opts.freeze_cgroup && prepare_freezer_for_interrupt_only_mode())
goto err;
if (compel_interrupt_task(pid)) {
set_cr_errno(ESRCH);
Expand Down Expand Up @@ -1067,7 +1073,7 @@ int collect_pstree(void)
if (ret < 0)
goto err;

if (opts.freeze_cgroup && !freeze_cgroup_disabled &&
if (opts.freeze_cgroup && !compel_interrupt_only_mode &&
freezer_wait_processes()) {
goto err;
}
Expand Down
2 changes: 1 addition & 1 deletion plugins/cuda/cuda_plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ int cuda_plugin_init(int stage)
INIT_LIST_HEAD(&cuda_pids);
}

dont_use_freeze_cgroup();
set_compel_interrupt_only_mode();

return 0;
}
Expand Down
4 changes: 3 additions & 1 deletion test/jenkins/criu-fault.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ fi
# also check for the main thread corruption
./test/zdtm.py run -t zdtm/static/fpu00 --fault 134 -f h --norst || fail

# check dont_use_freeze_cgroup
# check set_compel_interrupt_only_mode
./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137
rst0git marked this conversation as resolved.
Show resolved Hide resolved
./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 --norst
# check set_compel_interrupt_only_mode when test cgroup is frozen
./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:f --fault 137

if ./test/zdtm.py run -t zdtm/static/vfork00 --fault 136 --report report -f h ; then
fail
Expand Down
Loading