From c204cf2d66f0b56821489d84aa3b922f80e36b40 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Mon, 4 Nov 2024 19:57:30 +0000 Subject: [PATCH 1/2] seize: enable support for frozen containers Container runtimes like CRI-O and containerd utilize the freezer cgroup to create a consistent snapshot of container root filesystem (rootfs) changes. In this case, the container is frozen before invoking CRIU. After CRIU successfully completes, a copy of the container rootfs diff is saved, and the container is then unfrozen. However, the `cuda-checkpoint` tool is not able to perform a 'lock' action on frozen threads. To support GPU checkpointing with these container runtimes, we need to unfreeze the cgroup and return it to its original state once the checkpointing is complete. To reflect this new behavior, the following changes are applied: - `dont_use_freeze_cgroup(void)` -> `set_compel_interrupt_only_mode(void)` - `bool freeze_cgroup_disabled` -> `bool compel_interrupt_only_mode` - `check_freezer_cgroup(void)` -> `prepare_freezer_for_interrupt_only_mode(void)` Note that when `compel_interrupt_only_mode` is set to `true`, `compel_interrupt_task()` is used instead of `freeze_processes()` to prevent tasks from running during `criu dump`. Fixes: #2508 Signed-off-by: Radostin Stoyanov --- criu/fault-injection.c | 4 +-- criu/include/fault-injection.h | 2 +- criu/include/seize.h | 2 +- criu/seize.c | 46 +++++++++++++++++++--------------- plugins/cuda/cuda_plugin.c | 2 +- test/jenkins/criu-fault.sh | 2 +- 6 files changed, 32 insertions(+), 26 deletions(-) diff --git a/criu/fault-injection.c b/criu/fault-injection.c index 2272e6d842..5dd9acf601 100644 --- a/criu/fault-injection.c +++ b/criu/fault-injection.c @@ -24,8 +24,8 @@ int fault_injection_init(void) fi_strategy = start; switch (fi_strategy) { - case FI_DISABLE_FREEZE_CGROUP: - dont_use_freeze_cgroup(); + case FI_COMPEL_INTERRUPT_ONLY_MODE: + set_compel_interrupt_only_mode(); break; default: break; diff --git a/criu/include/fault-injection.h b/criu/include/fault-injection.h index 59adf05b9e..e987c18ce3 100644 --- a/criu/include/fault-injection.h +++ b/criu/include/fault-injection.h @@ -21,7 +21,7 @@ enum faults { FI_CORRUPT_EXTREGS = 134, FI_DONT_USE_PAGEMAP_SCAN = 135, FI_DUMP_CRASH = 136, - FI_DISABLE_FREEZE_CGROUP = 137, + FI_COMPEL_INTERRUPT_ONLY_MODE = 137, FI_PLUGIN_CUDA_FORCE_ENABLE = 138, FI_MAX, }; diff --git a/criu/include/seize.h b/criu/include/seize.h index f5ea76b16c..64e8d2d12f 100644 --- a/criu/include/seize.h +++ b/criu/include/seize.h @@ -9,6 +9,6 @@ extern bool alarm_timeouted(void); extern char *task_comm_info(pid_t pid, char *comm, size_t size); extern char *__task_comm_info(pid_t pid); -extern void dont_use_freeze_cgroup(void); +extern void set_compel_interrupt_only_mode(void); #endif diff --git a/criu/seize.c b/criu/seize.c index ab394f9ca5..9bd1832d9b 100644 --- a/criu/seize.c +++ b/criu/seize.c @@ -25,17 +25,17 @@ #include "xmalloc.h" #include "util.h" -static bool freeze_cgroup_disabled; +static bool compel_interrupt_only_mode; /* * Disables the use of freeze cgroups for process seizing, even if explicitly - * requested via the --freeze-cgroup option. This is necessary for plugins - * (e.g., CUDA) that do not function correctly when processes are frozen using - * cgroups. + * requested via the --freeze-cgroup option or already set in a frozen state. + * This is necessary for plugins (e.g., CUDA) that do not function correctly + * when processes are frozen using cgroups. */ -void __attribute__((used)) dont_use_freeze_cgroup(void) +void __attribute__((used)) set_compel_interrupt_only_mode(void) { - freeze_cgroup_disabled = true; + compel_interrupt_only_mode = true; } char *task_comm_info(pid_t pid, char *comm, size_t size) @@ -410,7 +410,7 @@ static int freezer_detach(void) { int i; - if (!opts.freeze_cgroup || freeze_cgroup_disabled) + if (!opts.freeze_cgroup || compel_interrupt_only_mode) return 0; for (i = 0; i < processes_to_wait && processes_to_wait_pids; i++) { @@ -505,29 +505,35 @@ static int log_unfrozen_stacks(char *root) return 0; } -static int check_freezer_cgroup(void) +static int prepare_freezer_for_interrupt_only_mode(void) { enum freezer_state state = THAWED; int fd; + int exit_code = -1; - BUG_ON(!freeze_cgroup_disabled); + BUG_ON(!compel_interrupt_only_mode); fd = freezer_open(); if (fd < 0) return -1; state = get_freezer_state(fd); - close(fd); if (state == FREEZER_ERROR) { - return -1; + goto err; } + origin_freezer_state = state == FREEZING ? FROZEN : state; + if (state != THAWED) { - pr_err("One or more plugins are incompatible with the freezer cgroup in the FROZEN state.\n"); - return -1; + pr_warn("unfreezing cgroup for plugin compatibility\n"); + if (freezer_write_state(fd, THAWED)) + goto err; } - return 0; + exit_code = 0; +err: + close(fd); + return exit_code; } static int freeze_processes(void) @@ -681,7 +687,7 @@ static int collect_children(struct pstree_item *item) goto free; } - if (!opts.freeze_cgroup || freeze_cgroup_disabled) + if (!opts.freeze_cgroup || compel_interrupt_only_mode) /* fails when meets a zombie */ __ignore_value(compel_interrupt_task(pid)); @@ -869,7 +875,7 @@ static int collect_threads(struct pstree_item *item) pr_info("\tSeizing %d's %d thread\n", item->pid->real, pid); - if ((!opts.freeze_cgroup || freeze_cgroup_disabled) && + if ((!opts.freeze_cgroup || compel_interrupt_only_mode) && compel_interrupt_task(pid)) continue; @@ -926,7 +932,7 @@ static int collect_loop(struct pstree_item *item, int (*collect)(struct pstree_i { int attempts = NR_ATTEMPTS, nr_inprogress = 1; - if (opts.freeze_cgroup && !freeze_cgroup_disabled) + if (opts.freeze_cgroup && !compel_interrupt_only_mode) attempts = 1; /* @@ -1032,11 +1038,11 @@ int collect_pstree(void) pr_debug("Detected cgroup V%d freezer\n", cgroup_v2 ? 2 : 1); - if (opts.freeze_cgroup && !freeze_cgroup_disabled) { + if (opts.freeze_cgroup && !compel_interrupt_only_mode) { if (freeze_processes()) goto err; } else { - if (opts.freeze_cgroup && check_freezer_cgroup()) + if (opts.freeze_cgroup && prepare_freezer_for_interrupt_only_mode()) goto err; if (compel_interrupt_task(pid)) { set_cr_errno(ESRCH); @@ -1067,7 +1073,7 @@ int collect_pstree(void) if (ret < 0) goto err; - if (opts.freeze_cgroup && !freeze_cgroup_disabled && + if (opts.freeze_cgroup && !compel_interrupt_only_mode && freezer_wait_processes()) { goto err; } diff --git a/plugins/cuda/cuda_plugin.c b/plugins/cuda/cuda_plugin.c index c4fc67fa9f..3d624750e6 100644 --- a/plugins/cuda/cuda_plugin.c +++ b/plugins/cuda/cuda_plugin.c @@ -509,7 +509,7 @@ int cuda_plugin_init(int stage) INIT_LIST_HEAD(&cuda_pids); } - dont_use_freeze_cgroup(); + set_compel_interrupt_only_mode(); return 0; } diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index fc0eddc2b2..8cb71d8ca7 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -40,7 +40,7 @@ fi # also check for the main thread corruption ./test/zdtm.py run -t zdtm/static/fpu00 --fault 134 -f h --norst || fail -# check dont_use_freeze_cgroup +# check set_compel_interrupt_only_mode ./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 ./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 --norst From 495e39e628a2859c7f4900c670034b9f9c2a5b24 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 8 Nov 2024 13:41:20 +0000 Subject: [PATCH 2/2] ci: test interrupt-only mode with frozen cgroup Signed-off-by: Radostin Stoyanov --- test/jenkins/criu-fault.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/jenkins/criu-fault.sh b/test/jenkins/criu-fault.sh index 8cb71d8ca7..6ee7ce33a8 100755 --- a/test/jenkins/criu-fault.sh +++ b/test/jenkins/criu-fault.sh @@ -43,6 +43,8 @@ fi # check set_compel_interrupt_only_mode ./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 ./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:t --fault 137 --norst +# check set_compel_interrupt_only_mode when test cgroup is frozen +./test/zdtm.py run -t zdtm/static/env00 --freezecg zdtm:f --fault 137 if ./test/zdtm.py run -t zdtm/static/vfork00 --fault 136 --report report -f h ; then fail