Skip to content

Commit

Permalink
Put a cap on the size of single preadv in restore operation.
Browse files Browse the repository at this point in the history
While each preadv() is followed by a fallocate() that removes the data
range from image files on tmpfs, temporarily (between preadv() and
fallocate()) the same data is in two places; this increases the memory
overhead of restore operation by the size of a single preadv.
Uncapped preadv() would read up to 2 GiB of data, thus we limit that to
a smaller block size (128 MiB).

Based-on-work-by: Paweł Stradomski <[email protected]>
Signed-off-by: Michał Mirosław <[email protected]>
  • Loading branch information
osctobe authored and avagin committed Aug 21, 2023
1 parent b5c3ccc commit 5fedcaa
Showing 1 changed file with 46 additions and 1 deletion.
47 changes: 46 additions & 1 deletion criu/pie/restorer.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@
*/
#define MAX_GETGROUPS_CHECKED (512 / sizeof(unsigned int))

/*
* Memory overhead limit for reading VMA when auto_dedup is enabled.
* An arbitrarily chosen trade-off point between speed and memory usage.
*/
#define AUTO_DEDUP_OVERHEAD_BYTES (128 << 20)

#ifndef PR_SET_PDEATHSIG
#define PR_SET_PDEATHSIG 1
#endif
Expand Down Expand Up @@ -1477,6 +1483,40 @@ static int fd_poll(int inotify_fd)
return sys_ppoll(&pfd, 1, &tmo, NULL, sizeof(sigset_t));
}

/*
* Call preadv() but limit size of the read. Zero `max_to_read` skips the limit.
*/
static ssize_t preadv_limited(int fd, struct iovec *iovs, int nr, off_t offs, size_t max_to_read)
{
size_t saved_last_iov_len = 0;
ssize_t ret;

if (max_to_read) {
for (int i = 0; i < nr; ++i) {
if (iovs[i].iov_len <= max_to_read) {
max_to_read -= iovs[i].iov_len;
continue;
}

if (!max_to_read) {
nr = i;
break;
}

saved_last_iov_len = iovs[i].iov_len;
iovs[i].iov_len = max_to_read;
nr = i + 1;
break;
}
}

ret = sys_preadv(fd, iovs, nr, offs);
if (saved_last_iov_len)
iovs[nr - 1].iov_len = saved_last_iov_len;

return ret;
}

/*
* In the worst case buf size should be:
* sizeof(struct inotify_event) * 2 + PATH_MAX
Expand Down Expand Up @@ -1748,7 +1788,12 @@ long __export_restore_task(struct task_restore_args *args)

while (nr) {
pr_debug("Preadv %lx:%d... (%d iovs)\n", (unsigned long)iovs->iov_base, (int)iovs->iov_len, nr);
r = sys_preadv(args->vma_ios_fd, iovs, nr, rio->off);
/*
* If we're requested to punch holes in the file after reading we do
* it to save memory. Limit the reads then to an arbitrary block size.
*/
r = preadv_limited(args->vma_ios_fd, iovs, nr, rio->off,
args->auto_dedup ? AUTO_DEDUP_OVERHEAD_BYTES : 0);
if (r < 0) {
pr_err("Can't read pages data (%d)\n", (int)r);
goto core_restore_end;
Expand Down

0 comments on commit 5fedcaa

Please sign in to comment.