Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

criu: Allow disabling freeze cgroups #2475

Merged
merged 2 commits into from
Sep 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions criu/fault-injection.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <stdlib.h>
#include "criu-log.h"
#include "fault-injection.h"
#include "seize.h"

enum faults fi_strategy;

Expand All @@ -21,5 +22,13 @@ int fault_injection_init(void)
}

fi_strategy = start;

switch (fi_strategy) {
case FI_DISABLE_FREEZE_CGROUP:
dont_use_freeze_cgroup();
break;
default:
break;
};
Dismissed Show dismissed Hide dismissed
return 0;
}
1 change: 1 addition & 0 deletions criu/include/fault-injection.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ enum faults {
FI_CORRUPT_EXTREGS = 134,
FI_DONT_USE_PAGEMAP_SCAN = 135,
FI_DUMP_CRASH = 136,
FI_DISABLE_FREEZE_CGROUP = 137,
FI_MAX,
};

Expand Down
2 changes: 2 additions & 0 deletions criu/include/seize.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
#define __CR_SEIZE_H__

extern int collect_pstree(void);
struct pstree_item;
extern void pstree_switch_state(struct pstree_item *root_item, int st);
extern const char *get_real_freezer_state(void);
extern bool alarm_timeouted(void);

extern char *task_comm_info(pid_t pid, char *comm, size_t size);
extern char *__task_comm_info(pid_t pid);
extern void dont_use_freeze_cgroup(void);

#endif
66 changes: 55 additions & 11 deletions criu/seize.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,19 @@
#include "xmalloc.h"
#include "util.h"

static bool freeze_cgroup_disabled;

/*
* Disables the use of freeze cgroups for process seizing, even if explicitly
* requested via the --freeze-cgroup option. This is necessary for plugins
* (e.g., CUDA) that do not function correctly when processes are frozen using
* cgroups.
*/
void __attribute__((used)) dont_use_freeze_cgroup(void)
{
freeze_cgroup_disabled = true;
}

char *task_comm_info(pid_t pid, char *comm, size_t size)
{
bool is_read = false;
Expand Down Expand Up @@ -397,7 +410,7 @@ static int freezer_detach(void)
{
int i;

if (!opts.freeze_cgroup)
if (!opts.freeze_cgroup || freeze_cgroup_disabled)
return 0;

for (i = 0; i < processes_to_wait && processes_to_wait_pids; i++) {
Expand Down Expand Up @@ -492,6 +505,31 @@ static int log_unfrozen_stacks(char *root)
return 0;
}

static int check_freezer_cgroup(void)
{
enum freezer_state state = THAWED;
int fd;

BUG_ON(!freeze_cgroup_disabled);

fd = freezer_open();
if (fd < 0)
return -1;

state = get_freezer_state(fd);
close(fd);
if (state == FREEZER_ERROR) {
return -1;
}

if (state != THAWED) {
pr_err("One or more plugins are incompatible with the freezer cgroup in the FROZEN state.\n");
return -1;
}

return 0;
}

static int freeze_processes(void)
{
int fd, exit_code = -1;
Expand Down Expand Up @@ -643,7 +681,7 @@ static int collect_children(struct pstree_item *item)
goto free;
}

if (!opts.freeze_cgroup)
if (!opts.freeze_cgroup || freeze_cgroup_disabled)
/* fails when meets a zombie */
__ignore_value(compel_interrupt_task(pid));

Expand Down Expand Up @@ -831,7 +869,8 @@ static int collect_threads(struct pstree_item *item)

pr_info("\tSeizing %d's %d thread\n", item->pid->real, pid);

if (!opts.freeze_cgroup && compel_interrupt_task(pid))
if ((!opts.freeze_cgroup || freeze_cgroup_disabled) &&
compel_interrupt_task(pid))
continue;

ret = compel_wait_task(pid, item_ppid(item), parse_pid_status, NULL, &t_creds.s, NULL);
Expand Down Expand Up @@ -887,7 +926,7 @@ static int collect_loop(struct pstree_item *item, int (*collect)(struct pstree_i
{
int attempts = NR_ATTEMPTS, nr_inprogress = 1;

if (opts.freeze_cgroup)
if (opts.freeze_cgroup && !freeze_cgroup_disabled)
attempts = 1;

/*
Expand Down Expand Up @@ -993,12 +1032,16 @@ int collect_pstree(void)

pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1);

if (opts.freeze_cgroup && freeze_processes())
goto err;

if (!opts.freeze_cgroup && compel_interrupt_task(pid)) {
set_cr_errno(ESRCH);
goto err;
if (opts.freeze_cgroup && !freeze_cgroup_disabled) {
if (freeze_processes())
goto err;
} else {
if (opts.freeze_cgroup && check_freezer_cgroup())
goto err;
if (compel_interrupt_task(pid)) {
avagin marked this conversation as resolved.
Show resolved Hide resolved
set_cr_errno(ESRCH);
goto err;
}
}

ret = compel_wait_task(pid, -1, parse_pid_status, NULL, &creds.s, NULL);
Expand All @@ -1024,7 +1067,8 @@ int collect_pstree(void)
if (ret < 0)
goto err;

if (opts.freeze_cgroup && freezer_wait_processes()) {
if (opts.freeze_cgroup && !freeze_cgroup_disabled &&
freezer_wait_processes()) {
ret = -1;
goto err;
}
Expand Down
2 changes: 2 additions & 0 deletions plugins/cuda/cuda_plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,8 @@ int cuda_plugin_init(int stage)
INIT_LIST_HEAD(&cuda_pids);
}

dont_use_freeze_cgroup();

return 0;
}

Expand Down
4 changes: 4 additions & 0 deletions test/jenkins/criu-fault.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ fi
# also check for the main thread corruption
./test/zdtm.py run -t zdtm/static/fpu00 --fault 134 -f h --norst || fail

# check dont_use_freeze_cgroup
./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137
./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 --norst

if ./test/zdtm.py run -t zdtm/static/vfork00 --fault 136 --report report -f h ; then
fail
fi
Loading