From f38507b6c090da1de113ae7c72e44aa0f716bde3 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 12 Apr 2024 15:18:12 +0200 Subject: [PATCH 01/17] lxcfs: use macro to generate liblxcfs call helpers Let's reduce code duplication by using macro for this. Signed-off-by: Alexander Mikhalitsyn --- src/lxcfs.c | 508 +++++++++------------------------------------------- 1 file changed, 88 insertions(+), 420 deletions(-) diff --git a/src/lxcfs.c b/src/lxcfs.c index c5eef200..54417df9 100644 --- a/src/lxcfs.c +++ b/src/lxcfs.c @@ -242,427 +242,95 @@ static void sigusr1_reload(int signo, siginfo_t *info, void *extra) } /* Functions to run the library methods */ -static int do_cg_getattr(const char *path, struct stat *sb) -{ - char *error; - int (*__cg_getattr)(const char *path, struct stat *sb); - - dlerror(); - __cg_getattr = (int (*)(const char *, struct stat *))dlsym(dlopen_handle, "cg_getattr"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_getattr()", error); - - return __cg_getattr(path, sb); -} - -static int do_proc_getattr(const char *path, struct stat *sb) -{ - char *error; - int (*__proc_getattr)(const char *path, struct stat *sb); - - dlerror(); - __proc_getattr = (int (*)(const char *, struct stat *)) dlsym(dlopen_handle, "proc_getattr"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find proc_getattr()", error); - - return __proc_getattr(path, sb); -} - -static int do_sys_getattr(const char *path, struct stat *sb) -{ - char *error; - int (*__sys_getattr)(const char *path, struct stat *sb); - - dlerror(); - __sys_getattr = (int (*)(const char *, struct stat *)) dlsym(dlopen_handle, "sys_getattr"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_getattr()", error); - - return __sys_getattr(path, sb); -} - -static int do_cg_read(const char *path, char *buf, size_t size, off_t offset, - struct fuse_file_info *fi) -{ - char *error; - int (*__cg_read)(const char *path, char *buf, size_t size, off_t offset, - struct fuse_file_info *fi); - - dlerror(); - __cg_read = (int (*)(const char *, char *, size_t, off_t, struct fuse_file_info *))dlsym(dlopen_handle, "cg_read"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_read()", error); - - return __cg_read(path, buf, size, offset, fi); -} - -static int do_proc_read(const char *path, char *buf, size_t size, off_t offset, - struct fuse_file_info *fi) -{ - char *error; - int (*__proc_read)(const char *path, char *buf, size_t size, - off_t offset, struct fuse_file_info *fi); - - dlerror(); - __proc_read = (int (*)(const char *, char *, size_t, off_t, struct fuse_file_info *))dlsym(dlopen_handle, "proc_read"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find proc_read()", error); - - return __proc_read(path, buf, size, offset, fi); -} - -static int do_sys_read(const char *path, char *buf, size_t size, off_t offset, - struct fuse_file_info *fi) -{ - char *error; - int (*__sys_read)(const char *path, char *buf, size_t size, - off_t offset, struct fuse_file_info *fi); - - dlerror(); - __sys_read = (int (*)(const char *, char *, size_t, off_t, struct fuse_file_info *))dlsym(dlopen_handle, "sys_read"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_read()", error); - - return __sys_read(path, buf, size, offset, fi); -} - -static int do_cg_write(const char *path, const char *buf, size_t size, - off_t offset, struct fuse_file_info *fi) -{ - char *error; - int (*__cg_write)(const char *path, const char *buf, size_t size, - off_t offset, struct fuse_file_info *fi); - - dlerror(); - __cg_write = (int (*)(const char *, const char *, size_t, off_t, struct fuse_file_info *))dlsym(dlopen_handle, "cg_write"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_write()", error); - - return __cg_write(path, buf, size, offset, fi); -} - -static int do_sys_write(const char *path, const char *buf, size_t size, - off_t offset, struct fuse_file_info *fi) -{ - char *error; - int (*__sys_write)(const char *path, const char *buf, size_t size, - off_t offset, struct fuse_file_info *fi); - dlerror(); - __sys_write = (int (*)(const char *, const char *, size_t, off_t, struct fuse_file_info *))dlsym(dlopen_handle, "sys_write"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_write()", error); - - return __sys_write(path, buf, size, offset, fi); -} - -static int do_cg_mkdir(const char *path, mode_t mode) -{ - char *error; - int (*__cg_mkdir)(const char *path, mode_t mode); - - dlerror(); - __cg_mkdir = (int (*)(const char *, mode_t))dlsym(dlopen_handle, "cg_mkdir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_mkdir()", error); - - return __cg_mkdir(path, mode); -} - -static int do_cg_chown(const char *path, uid_t uid, gid_t gid) -{ - char *error; - int (*__cg_chown)(const char *path, uid_t uid, gid_t gid); - - dlerror(); - __cg_chown = (int (*)(const char *, uid_t, gid_t))dlsym(dlopen_handle, "cg_chown"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_chown()", error); - - return __cg_chown(path, uid, gid); -} - -static int do_cg_rmdir(const char *path) -{ - char *error; - int (*__cg_rmdir)(const char *path); - - dlerror(); - __cg_rmdir = (int (*)(const char *path))dlsym(dlopen_handle, "cg_rmdir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_rmdir()", error); - - return __cg_rmdir(path); -} - -static int do_cg_chmod(const char *path, mode_t mode) -{ - char *error; - int (*__cg_chmod)(const char *path, mode_t mode); - - dlerror(); - __cg_chmod = (int (*)(const char *, mode_t))dlsym(dlopen_handle, "cg_chmod"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_chmod()", error); - - return __cg_chmod(path, mode); -} - -static int do_cg_readdir(const char *path, void *buf, fuse_fill_dir_t filler, - off_t offset, struct fuse_file_info *fi) -{ - char *error; - int (*__cg_readdir)(const char *path, void *buf, fuse_fill_dir_t filler, - off_t offset, struct fuse_file_info *fi); - - dlerror(); - __cg_readdir = (int (*)(const char *, void *, fuse_fill_dir_t, off_t, struct fuse_file_info *))dlsym(dlopen_handle, "cg_readdir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_readdir()", error); - - return __cg_readdir(path, buf, filler, offset, fi); -} - -static int do_proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, - off_t offset, struct fuse_file_info *fi) -{ - char *error; - int (*__proc_readdir)(const char *path, void *buf, fuse_fill_dir_t filler, - off_t offset, struct fuse_file_info *fi); - - dlerror(); - __proc_readdir = (int (*)(const char *, void *, fuse_fill_dir_t, off_t, struct fuse_file_info *))dlsym(dlopen_handle, "proc_readdir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find proc_readdir()", error); - - return __proc_readdir(path, buf, filler, offset, fi); -} - -static int do_sys_readdir(const char *path, void *buf, fuse_fill_dir_t filler, - off_t offset, struct fuse_file_info *fi) -{ - char *error; - int (*__sys_readdir)(const char *path, void *buf, fuse_fill_dir_t filler, - off_t offset, struct fuse_file_info *fi); - - dlerror(); - __sys_readdir = (int (*)(const char *, void *, fuse_fill_dir_t, off_t, struct fuse_file_info *))dlsym(dlopen_handle, "sys_readdir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_readdir()", error); - - return __sys_readdir(path, buf, filler, offset, fi); -} - -static int do_sys_readlink(const char *path, char *buf, size_t size) -{ - char *error; - int (*__sys_readlink)(const char *path, char *buf, size_t size); - - dlerror(); - __sys_readlink = (int (*)(const char *, char *, size_t))dlsym(dlopen_handle, "sys_readlink"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_readlink()", error); - - return __sys_readlink(path, buf, size); -} - -static int do_cg_open(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__cg_open)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __cg_open = (int (*)(const char *, struct fuse_file_info *))dlsym(dlopen_handle, "cg_open"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_open()", error); - - return __cg_open(path, fi); -} - -static int do_cg_access(const char *path, int mode) -{ - char *error; - int (*__cg_access)(const char *path, int mode); - - dlerror(); - __cg_access = (int (*)(const char *, int mode))dlsym(dlopen_handle, "cg_access"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_access()", error); - - return __cg_access(path, mode); -} - -static int do_proc_open(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__proc_open)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __proc_open = (int (*)(const char *path, struct fuse_file_info *fi))dlsym(dlopen_handle, "proc_open"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find proc_open()", error); - - return __proc_open(path, fi); -} - -static int do_proc_access(const char *path, int mode) -{ - char *error; - int (*__proc_access)(const char *path, int mode); - - dlerror(); - __proc_access = (int (*)(const char *, int mode))dlsym(dlopen_handle, "proc_access"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find proc_access()", error); - - return __proc_access(path, mode); -} - -static int do_sys_open(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__sys_open)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __sys_open = (int (*)(const char *path, struct fuse_file_info *fi))dlsym(dlopen_handle, "sys_open"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_open()", error); - - return __sys_open(path, fi); -} - -static int do_sys_opendir(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__sys_opendir)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __sys_opendir = (int (*)(const char *path, struct fuse_file_info *fi))dlsym(dlopen_handle, "sys_opendir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_opendir()", error); - - return __sys_opendir(path, fi); -} - -static int do_sys_access(const char *path, int mode) -{ - char *error; - int (*__sys_access)(const char *path, int mode); - - dlerror(); - __sys_access = (int (*)(const char *, int mode))dlsym(dlopen_handle, "sys_access"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_access()", error); - - return __sys_access(path, mode); -} - -static int do_cg_release(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__cg_release)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __cg_release = (int (*)(const char *path, struct fuse_file_info *))dlsym(dlopen_handle, "cg_release"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_release()", error); - - return __cg_release(path, fi); -} - -static int do_proc_release(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__proc_release)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __proc_release = (int (*)(const char *path, struct fuse_file_info *)) dlsym(dlopen_handle, "proc_release"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find proc_release()", error); - - return __proc_release(path, fi); -} - -static int do_sys_release(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__sys_release)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __sys_release = (int (*)(const char *path, struct fuse_file_info *))dlsym(dlopen_handle, "sys_release"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_release()", error); - - return __sys_release(path, fi); -} - -static int do_cg_opendir(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__cg_opendir)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __cg_opendir = (int (*)(const char *path, struct fuse_file_info *fi))dlsym(dlopen_handle, "cg_opendir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_opendir()", error); - - return __cg_opendir(path, fi); -} - -static int do_cg_releasedir(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__cg_releasedir)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __cg_releasedir = (int (*)(const char *path, struct fuse_file_info *))dlsym(dlopen_handle, "cg_releasedir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find cg_releasedir()", error); - - return __cg_releasedir(path, fi); -} - -static int do_sys_releasedir(const char *path, struct fuse_file_info *fi) -{ - char *error; - int (*__sys_releasedir)(const char *path, struct fuse_file_info *fi); - - dlerror(); - __sys_releasedir = (int (*)(const char *path, struct fuse_file_info *))dlsym(dlopen_handle, "sys_releasedir"); - error = dlerror(); - if (error) - return log_error(-1, "%s - Failed to find sys_releasedir()", error); - - return __sys_releasedir(path, fi); -} +#define DEF_LIB_FS_OP(type, fsop) \ +static int do_##type##_##fsop(LIB_FS_##fsop##_OP_ARGS_TYPE) \ +{ \ + char *error; \ + int (*__##type##_##fsop)(LIB_FS_##fsop##_OP_ARGS_TYPE); \ + \ + dlerror(); \ + __##type##_##fsop = (int (*)(LIB_FS_##fsop##_OP_ARGS_TYPE))dlsym(dlopen_handle, #type"_"#fsop); \ + error = dlerror(); \ + if (error) \ + return log_error(-1, "%s - Failed to find "#type"_"#fsop"()", error); \ + \ + return __##type##_##fsop(LIB_FS_##fsop##_OP_ARGS); \ +} + +#define LIB_FS_getattr_OP_ARGS_TYPE const char *path, struct stat *sb +#define LIB_FS_getattr_OP_ARGS path, sb +DEF_LIB_FS_OP(cg , getattr) +DEF_LIB_FS_OP(proc , getattr) +DEF_LIB_FS_OP(sys , getattr) + +#define LIB_FS_read_OP_ARGS_TYPE const char *path, char *buf, size_t size, \ + off_t offset, struct fuse_file_info *fi +#define LIB_FS_read_OP_ARGS path, buf, size, offset, fi +DEF_LIB_FS_OP(cg , read) +DEF_LIB_FS_OP(proc , read) +DEF_LIB_FS_OP(sys , read) + +#define LIB_FS_write_OP_ARGS_TYPE const char *path, const char *buf, size_t size, \ + off_t offset, struct fuse_file_info *fi +#define LIB_FS_write_OP_ARGS path, buf, size, offset, fi +DEF_LIB_FS_OP(cg , write) +DEF_LIB_FS_OP(sys , write) + +#define LIB_FS_mkdir_OP_ARGS_TYPE const char *path, mode_t mode +#define LIB_FS_mkdir_OP_ARGS path, mode +DEF_LIB_FS_OP(cg, mkdir) + +#define LIB_FS_chown_OP_ARGS_TYPE const char *path, uid_t uid, gid_t gid +#define LIB_FS_chown_OP_ARGS path, uid, gid +DEF_LIB_FS_OP(cg, chown) + +#define LIB_FS_rmdir_OP_ARGS_TYPE const char *path +#define LIB_FS_rmdir_OP_ARGS path +DEF_LIB_FS_OP(cg, rmdir) + +#define LIB_FS_chmod_OP_ARGS_TYPE const char *path, mode_t mode +#define LIB_FS_chmod_OP_ARGS path, mode +DEF_LIB_FS_OP(cg, chmod) + +#define LIB_FS_readdir_OP_ARGS_TYPE const char *path, void *buf, fuse_fill_dir_t filler, \ + off_t offset, struct fuse_file_info *fi +#define LIB_FS_readdir_OP_ARGS path, buf, filler, offset, fi +DEF_LIB_FS_OP(cg , readdir) +DEF_LIB_FS_OP(proc , readdir) +DEF_LIB_FS_OP(sys , readdir) + +#define LIB_FS_readlink_OP_ARGS_TYPE const char *path, char *buf, size_t size +#define LIB_FS_readlink_OP_ARGS path, buf, size +DEF_LIB_FS_OP(sys , readlink) + +#define LIB_FS_open_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi +#define LIB_FS_open_OP_ARGS path, fi +DEF_LIB_FS_OP(cg , open) +DEF_LIB_FS_OP(proc , open) +DEF_LIB_FS_OP(sys , open) + +#define LIB_FS_access_OP_ARGS_TYPE const char *path, int mode +#define LIB_FS_access_OP_ARGS path, mode +DEF_LIB_FS_OP(cg , access) +DEF_LIB_FS_OP(proc , access) +DEF_LIB_FS_OP(sys , access) + +#define LIB_FS_opendir_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi +#define LIB_FS_opendir_OP_ARGS path, fi +DEF_LIB_FS_OP(cg , opendir) +DEF_LIB_FS_OP(sys , opendir) + +#define LIB_FS_release_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi +#define LIB_FS_release_OP_ARGS path, fi +DEF_LIB_FS_OP(cg , release) +DEF_LIB_FS_OP(proc , release) +DEF_LIB_FS_OP(sys , release) + +#define LIB_FS_releasedir_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi +#define LIB_FS_releasedir_OP_ARGS path, fi +DEF_LIB_FS_OP(cg , releasedir) +DEF_LIB_FS_OP(sys , releasedir) static bool cgroup_is_enabled = false; From cb96e4169c0f514f68e3469a9c92c766698f0085 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 24 Apr 2024 20:20:41 +0200 Subject: [PATCH 02/17] src/bindings: hold pidns_hash_table references in the main object Let's allocate pidns_hash_table memory dynamically and hold pointers to it from a new lxcfs_data structure. Previously, pidns_hash_table was a statically allocated in liblxcfs, which means that it won't survive across liblxcfs reloads. Let's introduce a versionized lxcfs_data structure to keep persistent data that should survive reloads. Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 114 ++++++++++++++++++++++++++++++------------------- src/bindings.h | 37 ++++++++++++++++ src/lxcfs.c | 53 ++++++++++++++++++++--- 3 files changed, 155 insertions(+), 49 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index 7f840af2..ec2082d3 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -107,35 +107,8 @@ static int pivot_root(const char *new_root, const char *put_old) extern int pivot_root(const char *new_root, const char *put_old); #endif -/* - * A table caching which pid is init for a pid namespace. - * When looking up which pid is init for $qpid, we first - * 1. Stat /proc/$qpid/ns/pid. - * 2. Check whether the ino_t is in our store. - * a. if not, fork a child in qpid's ns to send us - * ucred.pid = 1, and read the initpid. Cache - * initpid and creation time for /proc/initpid - * in a new store entry. - * b. if so, verify that /proc/initpid still matches - * what we have saved. If not, clear the store - * entry and go back to a. If so, return the - * cached initpid. - */ -struct pidns_init_store { - ino_t ino; /* inode number for /proc/$pid/ns/pid */ - pid_t initpid; /* the pid of nit in that ns */ - int init_pidfd; - int64_t ctime; /* the time at which /proc/$initpid was created */ - struct pidns_init_store *next; - int64_t lastcheck; -}; - -/* lol - look at how they are allocated in the kernel */ -#define PIDNS_HASH_SIZE 4096 -#define HASH(x) ((x) % PIDNS_HASH_SIZE) - -static struct pidns_init_store *pidns_hash_table[PIDNS_HASH_SIZE]; -static pthread_mutex_t pidns_store_mutex = PTHREAD_MUTEX_INITIALIZER; +static struct pidns_store **pidns_hash_table; +static pthread_mutex_t *pidns_store_mutex; static void mutex_lock(pthread_mutex_t *l) { @@ -159,12 +132,12 @@ static void mutex_unlock(pthread_mutex_t *l) static inline void store_lock(void) { - mutex_lock(&pidns_store_mutex); + mutex_lock(pidns_store_mutex); } static inline void store_unlock(void) { - mutex_unlock(&pidns_store_mutex); + mutex_unlock(pidns_store_mutex); } #define define_interruptible_lock(type, lockname, lockfn) \ @@ -195,7 +168,7 @@ define_interruptible_lock(pthread_rwlock_t, rwlock_wrlock, pthread_rwlock_timedw #define LXCFS_PROC_PID_LEN \ (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + +1) -static int initpid_still_valid_pidfd(struct pidns_init_store *entry) +static int initpid_still_valid_pidfd(struct pidns_store *entry) { int ret; @@ -213,7 +186,7 @@ static int initpid_still_valid_pidfd(struct pidns_init_store *entry) return 1; } -static int initpid_still_valid_stat(struct pidns_init_store *entry) +static int initpid_still_valid_stat(struct pidns_store *entry) { struct stat st; char path[LXCFS_PROC_PID_LEN]; @@ -226,7 +199,7 @@ static int initpid_still_valid_stat(struct pidns_init_store *entry) } /* Must be called under store_lock */ -static bool initpid_still_valid(struct pidns_init_store *entry) +static bool initpid_still_valid(struct pidns_store *entry) { int ret; @@ -238,11 +211,14 @@ static bool initpid_still_valid(struct pidns_init_store *entry) } /* Must be called under store_lock */ -static void remove_initpid(struct pidns_init_store *entry) +static void remove_initpid(struct pidns_store *entry) { - struct pidns_init_store *it; + struct pidns_store *it; int ino_hash; + if (!pidns_hash_table) + return; + lxcfs_debug("Removing cached entry for pid %d from init pid cache", entry->initpid); @@ -273,6 +249,9 @@ static void prune_initpid_store(void) static int64_t last_prune = 0; int64_t now, threshold; + if (!pidns_hash_table) + return; + if (!last_prune) { last_prune = time(NULL); return; @@ -288,9 +267,9 @@ static void prune_initpid_store(void) threshold = now - 2 * PURGE_SECS; for (int i = 0; i < PIDNS_HASH_SIZE; i++) { - for (struct pidns_init_store *entry = pidns_hash_table[i], *prev = NULL; entry;) { + for (struct pidns_store *entry = pidns_hash_table[i], *prev = NULL; entry;) { if (entry->lastcheck < threshold) { - struct pidns_init_store *cur = entry; + struct pidns_store *cur = entry; lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid); @@ -311,10 +290,13 @@ static void prune_initpid_store(void) static void clear_initpid_store(void) { + if (!pidns_hash_table) + return; + store_lock(); for (int i = 0; i < PIDNS_HASH_SIZE; i++) { - for (struct pidns_init_store *entry = pidns_hash_table[i]; entry;) { - struct pidns_init_store *cur = entry; + for (struct pidns_store *entry = pidns_hash_table[i]; entry;) { + struct pidns_store *cur = entry; lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid); @@ -330,13 +312,16 @@ static void clear_initpid_store(void) /* Must be called under store_lock */ static void save_initpid(ino_t pidns_inode, pid_t pid) { - __do_free struct pidns_init_store *entry = NULL; + __do_free struct pidns_store *entry = NULL; __do_close int pidfd = -EBADF; const struct lxcfs_opts *opts = fuse_get_context()->private_data; char path[LXCFS_PROC_PID_LEN]; struct stat st; int ino_hash; + if (!pidns_hash_table) + return; + if (opts && opts->use_pidfd && can_use_pidfd) { pidfd = pidfd_open(pid, 0); if (pidfd < 0) @@ -352,7 +337,7 @@ static void save_initpid(ino_t pidns_inode, pid_t pid) return; ino_hash = HASH(pidns_inode); - *entry = (struct pidns_init_store){ + *entry = (struct pidns_store){ .ino = pidns_inode, .initpid = pid, .ctime = st.st_ctime, @@ -374,7 +359,12 @@ static void save_initpid(ino_t pidns_inode, pid_t pid) */ static pid_t lookup_verify_initpid(ino_t pidns_inode) { - struct pidns_init_store *entry = pidns_hash_table[HASH(pidns_inode)]; + struct pidns_store *entry; + + if (!pidns_hash_table) + return NULL; + + entry = pidns_hash_table[HASH(pidns_inode)]; while (entry) { if (entry->ino == pidns_inode) { @@ -1008,11 +998,23 @@ void lxcfslib_init(void) lxcfs_info("Failed to run constructor %s to reload liblxcfs", __func__); } +static bool old_daemon = false; + static void __attribute__((destructor)) lxcfs_exit(void) { lxcfs_info("Running destructor %s", __func__); clear_initpid_store(); + + if (old_daemon) { + if (pidns_store_mutex) { + pthread_mutex_destroy(pidns_store_mutex); + free(pidns_store_mutex); + } + + free(pidns_hash_table); + } + free_cpuview(); cgroup_exit(cgroup_ops); } @@ -1021,6 +1023,7 @@ void *lxcfs_fuse_init(struct fuse_conn_info *conn, void *data) { struct fuse_context *fc = fuse_get_context(); struct lxcfs_opts *opts = fc ? fc->private_data : NULL; + struct lxcfs_persistent_data *lxcfs_data = data; #if HAVE_FUSE_RETURNS_DT_TYPE can_use_sys_cpu = true; @@ -1035,5 +1038,30 @@ void *lxcfs_fuse_init(struct fuse_conn_info *conn, void *data) /* initialize the library */ lxcfslib_init(); + if (lxcfs_data) { + pidns_hash_table = lxcfs_data->pidns_hash_table; + pidns_store_mutex = &lxcfs_data->pidns_store_mutex; + } else { + lxcfs_info("Fallback way to initialize liblxcfs with old daemon binary. Please, consider full restart."); + + old_daemon = true; + + pidns_hash_table = zalloc(PIDNS_HASH_SIZE * sizeof(struct pidns_store *)); + if (!pidns_hash_table) + goto err; + + pidns_store_mutex = malloc(PIDNS_HASH_SIZE * sizeof(*pidns_store_mutex)); + if (!pidns_store_mutex) + goto err; + + if (pthread_mutex_init(pidns_store_mutex, NULL)) + goto err; + } + return opts; + +err: + lxcfs_error("liblxcfs failed to initialize. Turning off LXCFS virtualization.\n"); + reload_successful = 0; + return NULL; } diff --git a/src/bindings.h b/src/bindings.h index 346c261d..692967d0 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -104,6 +105,42 @@ struct file_info { int cached; }; +/* + * A table caching which pid is init for a pid namespace. + * When looking up which pid is init for $qpid, we first + * 1. Stat /proc/$qpid/ns/pid. + * 2. Check whether the ino_t is in our store. + * a. if not, fork a child in qpid's ns to send us + * ucred.pid = 1, and read the initpid. Cache + * initpid and creation time for /proc/initpid + * in a new store entry. + * b. if so, verify that /proc/initpid still matches + * what we have saved. If not, clear the store + * entry and go back to a. If so, return the + * cached initpid. + */ +struct pidns_store { + ino_t ino; /* inode number for /proc/$pid/ns/pid */ + pid_t initpid; /* the pid of nit in that ns */ + int init_pidfd; + int64_t ctime; /* the time at which /proc/$initpid was created */ + struct pidns_store *next; + int64_t lastcheck; +}; + +/* lol - look at how they are allocated in the kernel */ +#define PIDNS_HASH_SIZE 4096 +#define HASH(x) ((x) % PIDNS_HASH_SIZE) + +/* structure that contains data that should survive reload */ +struct lxcfs_persistent_data { + /* increase version if the structure was changed */ + __u16 version; + + struct pidns_store **pidns_hash_table; + pthread_mutex_t pidns_store_mutex; +}; + struct lxcfs_opts { bool swap_off; bool use_pidfd; diff --git a/src/lxcfs.c b/src/lxcfs.c index 54417df9..8eb44d6e 100644 --- a/src/lxcfs.c +++ b/src/lxcfs.c @@ -70,6 +70,40 @@ static inline void users_unlock(void) unlock_mutex(&user_count_mutex); } +static struct lxcfs_persistent_data *lxcfs_data; + +struct lxcfs_persistent_data *alloc_lxcfs_data(void) +{ + struct lxcfs_persistent_data *data; + + data = zalloc(sizeof(struct lxcfs_persistent_data)); + if (!data) + return NULL; + + data->version = 1; + + data->pidns_hash_table = zalloc(PIDNS_HASH_SIZE * sizeof(struct pidns_store *)); + if (!data->pidns_hash_table) + goto err; + + if (pthread_mutex_init(&data->pidns_store_mutex, NULL)) + goto err; + + return data; + +err: + free(data->pidns_hash_table); + free(data); + return NULL; +} + +void free_lxcfs_data(struct lxcfs_persistent_data *data) +{ + pthread_mutex_destroy(&data->pidns_store_mutex); + free(data->pidns_hash_table); + free(data); +} + /* Returns file info type of custom type declaration carried * in fuse_file_info */ static inline enum lxcfs_virt_t file_info_type(struct fuse_file_info *fi) @@ -150,18 +184,18 @@ static int stop_loadavg(void) static volatile sig_atomic_t need_reload; -static int do_lxcfs_fuse_init(void) +static int do_lxcfs_fuse_init(struct fuse_conn_info *conn, void *data) { char *error; - void *(*__lxcfs_fuse_init)(struct fuse_conn_info * conn, void * cfg); + void *(*__lxcfs_fuse_init)(struct fuse_conn_info *, void *); dlerror(); - __lxcfs_fuse_init = (void *(*)(struct fuse_conn_info * conn, void * cfg))dlsym(dlopen_handle, "lxcfs_fuse_init"); + __lxcfs_fuse_init = (void *(*)(struct fuse_conn_info *, void *))dlsym(dlopen_handle, "lxcfs_fuse_init"); error = dlerror(); if (error) return log_error(-1, "%s - Failed to find lxcfs_fuse_init()", error); - __lxcfs_fuse_init(NULL, NULL); + __lxcfs_fuse_init(conn, data); return 0; } @@ -208,7 +242,7 @@ static void do_reload(bool reinit) lxcfs_debug("Opened %s", lxcfs_lib_path); good: - if (reinit && do_lxcfs_fuse_init() < 0) { + if (reinit && do_lxcfs_fuse_init(NULL, lxcfs_data) < 0) { log_exit("Failed to initialize liblxcfs.so"); } @@ -802,7 +836,7 @@ static void *lxcfs_init(struct fuse_conn_info *conn, struct fuse_config *cfg) static void *lxcfs_init(struct fuse_conn_info *conn) #endif { - if (do_lxcfs_fuse_init() < 0) + if (do_lxcfs_fuse_init(conn, lxcfs_data) < 0) return NULL; #if HAVE_FUSE3 @@ -991,6 +1025,12 @@ int main(int argc, char *argv[]) struct lxcfs_opts *opts; char *runtime_path_arg = NULL; + lxcfs_data = alloc_lxcfs_data(); + if (lxcfs_data == NULL) { + lxcfs_error("Error allocating memory for lxcfs persistent data"); + goto out; + } + opts = malloc(sizeof(struct lxcfs_opts)); if (opts == NULL) { lxcfs_error("Error allocating memory for options"); @@ -1200,6 +1240,7 @@ int main(int argc, char *argv[]) unlink(pidfile); free(new_fuse_opts); free(opts); + free_lxcfs_data(lxcfs_data); close_prot_errno_disarm(pidfile_fd); exit(ret); } From d8ba40e806e89916ee5786e9ac8203daa5cee18b Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 24 Apr 2024 20:23:15 +0200 Subject: [PATCH 03/17] src/bindings: make struct pidns_store versionized It's necessary as we want to be able to easily extend it and use live reloads update mechanism. This change does not break compatibility, because struct pidns_store lifetime is limited to liblxcfs lifetime. But we'll make pidns_store lifetime bigger that's why we need to start versionizing it. Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 1 + src/bindings.h | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index ec2082d3..ea417f6e 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -338,6 +338,7 @@ static void save_initpid(ino_t pidns_inode, pid_t pid) ino_hash = HASH(pidns_inode); *entry = (struct pidns_store){ + .version = 0, .ino = pidns_inode, .initpid = pid, .ctime = st.st_ctime, diff --git a/src/bindings.h b/src/bindings.h index 692967d0..75b8490c 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -120,11 +120,18 @@ struct file_info { * cached initpid. */ struct pidns_store { + /* increase version if the structure was changed */ + __u16 version; + + /* hash table key */ ino_t ino; /* inode number for /proc/$pid/ns/pid */ - pid_t initpid; /* the pid of nit in that ns */ + + /* next entry in hash table's bucket */ + struct pidns_store *next; + + pid_t initpid; /* the pid of init in that ns */ int init_pidfd; int64_t ctime; /* the time at which /proc/$initpid was created */ - struct pidns_store *next; int64_t lastcheck; }; From b67e1efbe7ab74fc19b2f21570d2189d15d39904 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 25 Apr 2024 13:26:10 +0200 Subject: [PATCH 04/17] src/bindings: add keep_on_reload field to struct pidns_store Let's add keep_on_reload field to struct pidns_store. The idea behind it is that if this flag is set to true, then pidns_store entry won't be considered as a cache item which can be dropped. But instead, it will be kept across liblxcfs reloads and droped only if a pid namespace it refers die. Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 40 ++++++++++++++++++++++++++++++---------- src/bindings.h | 3 +++ 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index ea417f6e..6f2ef841 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -242,6 +242,12 @@ static void remove_initpid(struct pidns_store *entry) } } +static bool keep_pidns_entry(struct pidns_store *entry) +{ + return (entry->version >= 1) && entry->keep_on_reload && + initpid_still_valid(entry); +} + #define PURGE_SECS 5 /* Must be called under store_lock */ static void prune_initpid_store(void) @@ -268,10 +274,11 @@ static void prune_initpid_store(void) for (int i = 0; i < PIDNS_HASH_SIZE; i++) { for (struct pidns_store *entry = pidns_hash_table[i], *prev = NULL; entry;) { - if (entry->lastcheck < threshold) { - struct pidns_store *cur = entry; + struct pidns_store *cur = entry; - lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid); + if ((entry->lastcheck < threshold) && + !keep_pidns_entry(cur)) { + lxcfs_debug("Removed cache entry for pid %d from init pid cache", cur->initpid); if (prev) prev->next = entry->next; @@ -281,6 +288,8 @@ static void prune_initpid_store(void) close_prot_errno_disarm(cur->init_pidfd); free_disarm(cur); } else { + lxcfs_debug("Kept cache entry for pid %d in init pid cache", cur->initpid); + prev = entry; entry = entry->next; } @@ -295,15 +304,25 @@ static void clear_initpid_store(void) store_lock(); for (int i = 0; i < PIDNS_HASH_SIZE; i++) { - for (struct pidns_store *entry = pidns_hash_table[i]; entry;) { + for (struct pidns_store *entry = pidns_hash_table[i], *prev = NULL; entry;) { struct pidns_store *cur = entry; - lxcfs_debug("Removed cache entry for pid %d to init pid cache", cur->initpid); + if (keep_pidns_entry(cur)) { + lxcfs_debug("Kept cache entry for pid %d in init pid cache", cur->initpid); + + prev = entry; + entry = entry->next; + } else { + lxcfs_debug("Removed cache entry for pid %d from init pid cache", cur->initpid); - pidns_hash_table[i] = entry->next; - entry = entry->next; - close_prot_errno_disarm(cur->init_pidfd); - free_disarm(cur); + if (prev) + prev->next = entry->next; + else + pidns_hash_table[i] = entry->next; + entry = entry->next; + close_prot_errno_disarm(cur->init_pidfd); + free_disarm(cur); + } } } store_unlock(); @@ -338,13 +357,14 @@ static void save_initpid(ino_t pidns_inode, pid_t pid) ino_hash = HASH(pidns_inode); *entry = (struct pidns_store){ - .version = 0, + .version = 1, .ino = pidns_inode, .initpid = pid, .ctime = st.st_ctime, .next = pidns_hash_table[ino_hash], .lastcheck = time(NULL), .init_pidfd = move_fd(pidfd), + .keep_on_reload = false, }; pidns_hash_table[ino_hash] = move_ptr(entry); diff --git a/src/bindings.h b/src/bindings.h index 75b8490c..fb2a6814 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -133,6 +133,9 @@ struct pidns_store { int init_pidfd; int64_t ctime; /* the time at which /proc/$initpid was created */ int64_t lastcheck; + + /* Do not free on liblxcfs reload (contains useful persistent data) */ + bool keep_on_reload; }; /* lol - look at how they are allocated in the kernel */ From 04229073068e8cfb7efb3b66249a6f547277b745 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 25 Apr 2024 13:36:02 +0200 Subject: [PATCH 05/17] src/bindings: introduce free_initpid() helper Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index 6f2ef841..f49ef74b 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -210,6 +210,12 @@ static bool initpid_still_valid(struct pidns_store *entry) return ret == 1; } +static void free_initpid(struct pidns_store *entry) +{ + close_prot_errno_disarm(entry->init_pidfd); + free_disarm(entry); +} + /* Must be called under store_lock */ static void remove_initpid(struct pidns_store *entry) { @@ -225,8 +231,7 @@ static void remove_initpid(struct pidns_store *entry) ino_hash = HASH(entry->ino); if (pidns_hash_table[ino_hash] == entry) { pidns_hash_table[ino_hash] = entry->next; - close_prot_errno_disarm(entry->init_pidfd); - free_disarm(entry); + free_initpid(entry); return; } @@ -234,8 +239,7 @@ static void remove_initpid(struct pidns_store *entry) while (it) { if (it->next == entry) { it->next = entry->next; - close_prot_errno_disarm(entry->init_pidfd); - free_disarm(entry); + free_initpid(entry); return; } it = it->next; @@ -285,8 +289,7 @@ static void prune_initpid_store(void) else pidns_hash_table[i] = entry->next; entry = entry->next; - close_prot_errno_disarm(cur->init_pidfd); - free_disarm(cur); + free_initpid(cur); } else { lxcfs_debug("Kept cache entry for pid %d in init pid cache", cur->initpid); @@ -320,8 +323,7 @@ static void clear_initpid_store(void) else pidns_hash_table[i] = entry->next; entry = entry->next; - close_prot_errno_disarm(cur->init_pidfd); - free_disarm(cur); + free_initpid(cur); } } } From e3dbd5761d8242c36230645d6dc219cefe687f4b Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 25 Apr 2024 14:17:10 +0200 Subject: [PATCH 06/17] src/bindings: prevent duplicate entries in the pid hash table from appear Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index f49ef74b..f07a6ef2 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -584,6 +584,8 @@ pid_t lookup_initpid_in_store(pid_t pid) hashed_pid = lookup_verify_initpid(st.st_ino); if (hashed_pid < 0) { + pid_t already_hashed_pid; + /* release the mutex as the following call is expensive */ store_unlock(); @@ -591,8 +593,20 @@ pid_t lookup_initpid_in_store(pid_t pid) store_lock(); - if (hashed_pid > 0) - save_initpid(st.st_ino, hashed_pid); + /* recheck that entry wasn't added while lock was released */ + already_hashed_pid = lookup_verify_initpid(st.st_ino); + + /* no existing entry found. Just add a new one. */ + if (already_hashed_pid < 0) { + if (hashed_pid > 0) + save_initpid(st.st_ino, hashed_pid); + + /* entry found it must have the same pid */ + } else if (already_hashed_pid != hashed_pid) { + lxcfs_error("Different init pids (%d, %d) for the same cache entry %lu\n", + already_hashed_pid, hashed_pid, HASH(st.st_ino)); + hashed_pid = -1; + } } /* From a25e40c9397e22a1b1c0545147922f24fcd64355 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 25 Apr 2024 15:18:59 +0200 Subject: [PATCH 07/17] src/bindings: introduce a lookup_verify_pidns_entry helper Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index f07a6ef2..b6f277e4 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -373,14 +373,7 @@ static void save_initpid(ino_t pidns_inode, pid_t pid) lxcfs_debug("Added cache entry %d for pid %d to init pid cache", ino_hash, pid); } -/* - * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store - * entry for the inode number and creation time. Verify that the init pid - * is still valid. If not, remove it. Return the entry if valid, NULL - * otherwise. - * Must be called under store_lock - */ -static pid_t lookup_verify_initpid(ino_t pidns_inode) +static struct pidns_store *lookup_verify_pidns_entry(ino_t pidns_inode) { struct pidns_store *entry; @@ -393,16 +386,33 @@ static pid_t lookup_verify_initpid(ino_t pidns_inode) if (entry->ino == pidns_inode) { if (initpid_still_valid(entry)) { entry->lastcheck = time(NULL); - return entry->initpid; + return entry; } remove_initpid(entry); - return ret_errno(ESRCH); + return NULL; } entry = entry->next; } - return ret_errno(ESRCH); + return NULL; +} + +/* + * Given the stat(2) info for a nsfd pid inode, lookup the init_pid_store + * entry for the inode number and creation time. Verify that the init pid + * is still valid. If not, remove it. Return the entry if valid, NULL + * otherwise. + * Must be called under store_lock + */ +static pid_t lookup_verify_initpid(ino_t pidns_inode) +{ + struct pidns_store *entry = lookup_verify_pidns_entry(pidns_inode); + + if (!entry) + return ret_errno(ESRCH); + + return entry->initpid; } static bool send_creds_ok(int sock_fd) From 336fde1c2e89a0f6042828613f3e2fa1c3cd1add Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 25 Apr 2024 15:19:39 +0200 Subject: [PATCH 08/17] src/bindings: introduce a get_pidns_ino helper Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index b6f277e4..e25faee5 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -580,19 +580,30 @@ static pid_t scm_init_pid(pid_t task) return pid_ret; } -pid_t lookup_initpid_in_store(pid_t pid) +static ino_t get_pidns_ino(pid_t pid) { - pid_t hashed_pid = 0; char path[LXCFS_PROC_PID_NS_LEN]; struct stat st; snprintf(path, sizeof(path), "/proc/%d/ns/pid", pid); if (stat(path, &st)) + return 0; + + return st.st_ino; +} + +pid_t lookup_initpid_in_store(pid_t pid) +{ + pid_t hashed_pid = 0; + ino_t pidns_ino; + + pidns_ino = get_pidns_ino(pid); + if (!pidns_ino) return ret_errno(ESRCH); store_lock(); - hashed_pid = lookup_verify_initpid(st.st_ino); + hashed_pid = lookup_verify_initpid(pidns_ino); if (hashed_pid < 0) { pid_t already_hashed_pid; @@ -604,17 +615,17 @@ pid_t lookup_initpid_in_store(pid_t pid) store_lock(); /* recheck that entry wasn't added while lock was released */ - already_hashed_pid = lookup_verify_initpid(st.st_ino); + already_hashed_pid = lookup_verify_initpid(pidns_ino); /* no existing entry found. Just add a new one. */ if (already_hashed_pid < 0) { if (hashed_pid > 0) - save_initpid(st.st_ino, hashed_pid); + save_initpid(pidns_ino, hashed_pid); /* entry found it must have the same pid */ } else if (already_hashed_pid != hashed_pid) { lxcfs_error("Different init pids (%d, %d) for the same cache entry %lu\n", - already_hashed_pid, hashed_pid, HASH(st.st_ino)); + already_hashed_pid, hashed_pid, HASH(pidns_ino)); hashed_pid = -1; } } From 13ec8059339ff15f6b80374cf89f8cf670c03142 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Sun, 28 Apr 2024 15:29:08 +0200 Subject: [PATCH 09/17] src/bindings: add iter_initpid_store helper Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 21 +++++++++++++++++++++ src/bindings.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/src/bindings.c b/src/bindings.c index e25faee5..2e28f684 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -330,6 +330,27 @@ static void clear_initpid_store(void) store_unlock(); } +int iter_initpid_store(pidns_store_iter_func_t f, void *data) +{ + int ret; + + if (!pidns_hash_table) + return 0; + + store_lock(); + for (int i = 0; i < PIDNS_HASH_SIZE; i++) { + for (struct pidns_store *entry = pidns_hash_table[i]; entry; entry = entry->next) { + ret = f(entry, data); + if (ret) + goto out; + } + } + +out: + store_unlock(); + return ret; +} + /* Must be called under store_lock */ static void save_initpid(ino_t pidns_inode, pid_t pid) { diff --git a/src/bindings.h b/src/bindings.h index fb2a6814..9b2242a7 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -171,7 +171,9 @@ typedef enum lxcfs_opt_t { LXCFS_OPTS_MAX = LXCFS_CFS_ON, } lxcfs_opt_t; +typedef int (*pidns_store_iter_func_t) (struct pidns_store *cur, void *data); +extern int iter_initpid_store(pidns_store_iter_func_t f, void *data); extern pid_t lookup_initpid_in_store(pid_t qpid); extern void prune_init_slice(char *cg); extern bool supports_pidfd(void); From fc878a62cfe4ac1f2e2fcebb68ea157e7e50ee08 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 25 Apr 2024 15:20:25 +0200 Subject: [PATCH 10/17] src/bindings: add features bitmask to struct pidns_store This bitmask can be used to represent a per-instance (technically, per pid namespace) features configuration (toggle-like). Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++- src/bindings.h | 10 ++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/src/bindings.c b/src/bindings.c index 2e28f684..d85d424d 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -380,7 +380,7 @@ static void save_initpid(ino_t pidns_inode, pid_t pid) ino_hash = HASH(pidns_inode); *entry = (struct pidns_store){ - .version = 1, + .version = 2, .ino = pidns_inode, .initpid = pid, .ctime = st.st_ctime, @@ -661,6 +661,55 @@ pid_t lookup_initpid_in_store(pid_t pid) return hashed_pid; } +bool check_set_lxcfs_feature(pid_t pid, enum lxcfs_feature_op op, __u64 feature) +{ + bool ret = false; + struct pidns_store *entry; + ino_t pidns_ino; + + pidns_ino = get_pidns_ino(pid); + if (!pidns_ino) + return ret; + + store_lock(); + + entry = lookup_verify_pidns_entry(pidns_ino); + if (!entry) + goto out; + + if (entry->version < 2) + goto out; + + switch (op) { + case LXCFS_FEATURE_CHECK: + ret = entry->features & feature; + + break; + case LXCFS_FEATURE_SET: + entry->features |= feature; + + /* + * As we have enabled feature, this entry + * must be kept across lxcfs live reloads. + */ + entry->keep_on_reload = true; + + ret = true; + + break; + case LXCFS_FEATURE_CLEAR: + entry->features &= ~feature; + ret = true; + + break; + } + +out: + store_unlock(); + + return ret; +} + /* * Functions needed to setup cgroups in the __constructor__. */ diff --git a/src/bindings.h b/src/bindings.h index 9b2242a7..9ca79a0e 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -105,6 +105,12 @@ struct file_info { int cached; }; +enum lxcfs_feature_op { + LXCFS_FEATURE_CHECK, + LXCFS_FEATURE_SET, + LXCFS_FEATURE_CLEAR, +}; + /* * A table caching which pid is init for a pid namespace. * When looking up which pid is init for $qpid, we first @@ -136,6 +142,9 @@ struct pidns_store { /* Do not free on liblxcfs reload (contains useful persistent data) */ bool keep_on_reload; + + /* bit mask for per-instance configuration options (on/off) */ + __u64 features; }; /* lol - look at how they are allocated in the kernel */ @@ -175,6 +184,7 @@ typedef int (*pidns_store_iter_func_t) (struct pidns_store *cur, void *data); extern int iter_initpid_store(pidns_store_iter_func_t f, void *data); extern pid_t lookup_initpid_in_store(pid_t qpid); +extern bool check_set_lxcfs_feature(pid_t pid, enum lxcfs_feature_op op, __u64 feature); extern void prune_init_slice(char *cg); extern bool supports_pidfd(void); extern bool liblxcfs_functional(void); From edff17074ae6643c8a60b778b34b9a513144ef39 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Mon, 29 Apr 2024 10:27:57 +0200 Subject: [PATCH 11/17] lxcfs: add libcap-(dev|devel) build-time dependency Signed-off-by: Alexander Mikhalitsyn --- .github/workflows/builds.yml | 2 +- .github/workflows/tests.yml | 6 +++--- lxcfs.spec.in | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 24ec8fca..58d6f44b 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -27,7 +27,7 @@ jobs: run: | sudo apt-get update -qq sudo apt-get install -qq gcc clang - sudo apt-get install -qq libfuse-dev uuid-runtime + sudo apt-get install -qq libcap-dev libfuse-dev uuid-runtime sudo apt-get install -qq python3 python3-setuptools sudo pip3 install meson ninja diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 417ae80c..fc30ce4d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -27,7 +27,7 @@ jobs: run: | sudo apt-get update -qq sudo apt-get install -qq gcc clang - sudo apt-get install -qq libfuse-dev uuid-runtime + sudo apt-get install -qq libcap-dev libfuse-dev uuid-runtime sudo apt-get install -qq python3 python3-setuptools sudo pip3 install meson==0.55.1 ninja @@ -71,7 +71,7 @@ jobs: sudo add-apt-repository universe sudo apt-get update -qq sudo apt-get install -qq gcc clang - sudo apt-get install -qq libfuse3-dev uuid-runtime + sudo apt-get install -qq libcap-dev libfuse3-dev uuid-runtime sudo apt-get install -qq python3 python3-setuptools sudo pip3 install meson==0.55.1 ninja @@ -117,7 +117,7 @@ jobs: sudo add-apt-repository universe sudo apt-get update -qq sudo apt-get install -qq gcc clang - sudo apt-get install -qq libfuse3-dev uuid-runtime + sudo apt-get install -qq libcap-dev libfuse3-dev uuid-runtime sudo apt-get install -qq python3 python3-setuptools sudo pip3 install meson==0.55.1 ninja diff --git a/lxcfs.spec.in b/lxcfs.spec.in index 90bb033b..b224508b 100644 --- a/lxcfs.spec.in +++ b/lxcfs.spec.in @@ -33,6 +33,7 @@ BuildRequires: libtool BuildRequires: docbook2X BuildRequires: doxygen BuildRequires: fuse-devel +BuildRequires: libcap-devel Requires: fuse-libs %description From 0cd7eb4068f3f54b155fef7b208042662726d001 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Sun, 28 Apr 2024 17:46:21 +0200 Subject: [PATCH 12/17] src/utils: add proc_has_capability_in helper Signed-off-by: Alexander Mikhalitsyn --- src/utils.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ src/utils.h | 4 ++++ 2 files changed, 51 insertions(+) diff --git a/src/utils.c b/src/utils.c index deae03fb..1ffc3959 100644 --- a/src/utils.c +++ b/src/utils.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -713,3 +714,49 @@ bool can_access_personality(void) return could_access_init_personality != 0; } + +/* inspired by the Linux kernel's selftests/bpf :-) */ +bool proc_has_capability(pid_t pid, __u64 caps) +{ + struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; + struct __user_cap_header_struct hdr = { + .version = _LINUX_CAPABILITY_VERSION_3, + }; + __u32 cap0 = caps; + __u32 cap1 = caps >> 32; + int err; + + err = capget(&hdr, data); + if (err) + return false; + + return ((data[0].effective & cap0) == cap0 && + (data[1].effective & cap1) == cap1); +} + +#define LXCFS_PROC_USER_NS_LEN \ + (STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(uint64_t) + \ + STRLITERALLEN("/ns/user") + 1) + +static ino_t get_userns_ino(pid_t pid) +{ + char path[LXCFS_PROC_USER_NS_LEN]; + struct stat st; + + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + if (stat(path, &st)) + return 0; + + return st.st_ino; +} + +bool proc_has_capability_in(pid_t nspid, pid_t pid, cap_value_t cap) +{ + ino_t nspid_userns_ino, pid_userns_ino; + + nspid_userns_ino = get_userns_ino(nspid); + pid_userns_ino = get_userns_ino(pid); + + return (nspid_userns_ino == pid_userns_ino) && + proc_has_capability(pid, 1ULL << cap); +} diff --git a/src/utils.h b/src/utils.h index 90701ced..166f6cf7 100644 --- a/src/utils.h +++ b/src/utils.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -82,4 +83,7 @@ extern int get_task_personality(pid_t pid, __u32 *personality); extern bool can_access_personality(void); extern int get_host_personality(__u32 *personality); +extern bool proc_has_capability(pid_t pid, __u64 caps); +extern bool proc_has_capability_in(pid_t nspid, pid_t pid, cap_value_t cap); + #endif /* __LXCFS_UTILS_H */ From 3707a3316dab37a37bf83ceae8af53594a3917df Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Sun, 28 Apr 2024 22:06:36 +0200 Subject: [PATCH 13/17] src/utils: fix in_same_namespace helper Signed-off-by: Alexander Mikhalitsyn --- src/utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.c b/src/utils.c index 1ffc3959..8b13214d 100644 --- a/src/utils.c +++ b/src/utils.c @@ -114,7 +114,7 @@ bool is_shared_pidns(pid_t pid) return false; fd = in_same_namespace(pid, getpid(), "pid"); - if (fd == EINVAL) + if (fd == -EINVAL) return true; return false; From 6ee06596689c9904d798c65fb484f4324fbda1c8 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Fri, 12 Apr 2024 15:37:22 +0200 Subject: [PATCH 14/17] lxcfs: introduce a new "lxcfs" subtree Like we have "cgroup", "sys", "proc" subtrees, let's introduce the "lxcfs" subtree which will contain LXCFS filesystem-related data and will be used as an interface to interact and configure LXCFS in runtime. Signed-off-by: Alexander Mikhalitsyn --- src/api_extensions.h | 1 + src/bindings.h | 5 +++ src/lxcfs.c | 90 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+) diff --git a/src/api_extensions.h b/src/api_extensions.h index 77f69e71..0163f4ec 100644 --- a/src/api_extensions.h +++ b/src/api_extensions.h @@ -27,6 +27,7 @@ static char *api_extensions[] = { "cpuview_daemon", "loadavg_daemon", "pidfds", + "per_instance_configuration", }; static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); diff --git a/src/bindings.h b/src/bindings.h index 9ca79a0e..e8b86b5c 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -67,6 +67,10 @@ enum lxcfs_virt_t { LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE, #define LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH "/sys/devices/system/cpu/online" + + LXC_TYPE_LXCFS, + + LXC_TYPE_MAX, }; @@ -74,6 +78,7 @@ enum lxcfs_virt_t { #define LXCFS_TYPE_CGROUP(type) (type >= LXC_TYPE_CGDIR && type <= LXC_TYPE_CGFILE) #define LXCFS_TYPE_PROC(type) (type >= LXC_TYPE_PROC_MEMINFO && type <= LXC_TYPE_PROC_SLABINFO) #define LXCFS_TYPE_SYS(type) (type >= LXC_TYPE_SYS && type <= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE) +#define LXCFS_TYPE_LXCFS(type) (type >= LXC_TYPE_LXCFS && type < LXC_TYPE_MAX) #define LXCFS_TYPE_OK(type) (type >= LXC_TYPE_CGDIR && type < LXC_TYPE_MAX) /* diff --git a/src/lxcfs.c b/src/lxcfs.c index 8eb44d6e..e332b98f 100644 --- a/src/lxcfs.c +++ b/src/lxcfs.c @@ -297,6 +297,7 @@ static int do_##type##_##fsop(LIB_FS_##fsop##_OP_ARGS_TYPE) \ DEF_LIB_FS_OP(cg , getattr) DEF_LIB_FS_OP(proc , getattr) DEF_LIB_FS_OP(sys , getattr) +DEF_LIB_FS_OP(lxcfsctl, getattr) #define LIB_FS_read_OP_ARGS_TYPE const char *path, char *buf, size_t size, \ off_t offset, struct fuse_file_info *fi @@ -304,12 +305,14 @@ DEF_LIB_FS_OP(sys , getattr) DEF_LIB_FS_OP(cg , read) DEF_LIB_FS_OP(proc , read) DEF_LIB_FS_OP(sys , read) +DEF_LIB_FS_OP(lxcfsctl, read) #define LIB_FS_write_OP_ARGS_TYPE const char *path, const char *buf, size_t size, \ off_t offset, struct fuse_file_info *fi #define LIB_FS_write_OP_ARGS path, buf, size, offset, fi DEF_LIB_FS_OP(cg , write) DEF_LIB_FS_OP(sys , write) +DEF_LIB_FS_OP(lxcfsctl, write) #define LIB_FS_mkdir_OP_ARGS_TYPE const char *path, mode_t mode #define LIB_FS_mkdir_OP_ARGS path, mode @@ -333,38 +336,45 @@ DEF_LIB_FS_OP(cg, chmod) DEF_LIB_FS_OP(cg , readdir) DEF_LIB_FS_OP(proc , readdir) DEF_LIB_FS_OP(sys , readdir) +DEF_LIB_FS_OP(lxcfsctl, readdir) #define LIB_FS_readlink_OP_ARGS_TYPE const char *path, char *buf, size_t size #define LIB_FS_readlink_OP_ARGS path, buf, size DEF_LIB_FS_OP(sys , readlink) +DEF_LIB_FS_OP(lxcfsctl, readlink) #define LIB_FS_open_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi #define LIB_FS_open_OP_ARGS path, fi DEF_LIB_FS_OP(cg , open) DEF_LIB_FS_OP(proc , open) DEF_LIB_FS_OP(sys , open) +DEF_LIB_FS_OP(lxcfsctl, open) #define LIB_FS_access_OP_ARGS_TYPE const char *path, int mode #define LIB_FS_access_OP_ARGS path, mode DEF_LIB_FS_OP(cg , access) DEF_LIB_FS_OP(proc , access) DEF_LIB_FS_OP(sys , access) +DEF_LIB_FS_OP(lxcfsctl, access) #define LIB_FS_opendir_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi #define LIB_FS_opendir_OP_ARGS path, fi DEF_LIB_FS_OP(cg , opendir) DEF_LIB_FS_OP(sys , opendir) +DEF_LIB_FS_OP(lxcfsctl, opendir) #define LIB_FS_release_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi #define LIB_FS_release_OP_ARGS path, fi DEF_LIB_FS_OP(cg , release) DEF_LIB_FS_OP(proc , release) DEF_LIB_FS_OP(sys , release) +DEF_LIB_FS_OP(lxcfsctl, release) #define LIB_FS_releasedir_OP_ARGS_TYPE const char *path, struct fuse_file_info *fi #define LIB_FS_releasedir_OP_ARGS path, fi DEF_LIB_FS_OP(cg , releasedir) DEF_LIB_FS_OP(sys , releasedir) +DEF_LIB_FS_OP(lxcfsctl, releasedir) static bool cgroup_is_enabled = false; @@ -409,6 +419,13 @@ static int lxcfs_getattr(const char *path, struct stat *sb) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_getattr(path, sb); + down_users(); + return ret; + } + return -ENOENT; } @@ -436,6 +453,13 @@ static int lxcfs_opendir(const char *path, struct fuse_file_info *fi) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_opendir(path, fi); + down_users(); + return ret; + } + return -ENOENT; } @@ -455,6 +479,7 @@ static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, if (strcmp(path, "/") == 0) { if (dir_filler(filler, buf, ".", 0) != 0 || dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "lxcfs", 0) != 0 || dir_filler(filler, buf, "proc", 0) != 0 || dir_filler(filler, buf, "sys", 0) != 0 || (cgroup_is_enabled && dir_filler(filler, buf, "cgroup", 0) != 0)) @@ -484,6 +509,13 @@ static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler, return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_readdir(path, buf, filler, offset, fi); + down_users(); + return ret; + } + return -ENOENT; } @@ -515,6 +547,13 @@ static int lxcfs_access(const char *path, int mode) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_access(path, mode); + down_users(); + return ret; + } + return -EACCES; } @@ -539,6 +578,13 @@ static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi) return ret; } + if (LXCFS_TYPE_LXCFS(type)) { + up_users(); + ret = do_lxcfsctl_releasedir(path, fi); + down_users(); + return ret; + } + if (path) { if (strcmp(path, "/") == 0) return 0; @@ -577,6 +623,13 @@ static int lxcfs_open(const char *path, struct fuse_file_info *fi) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_open(path, fi); + down_users(); + return ret; + } + return -EACCES; } @@ -609,6 +662,13 @@ static int lxcfs_read(const char *path, char *buf, size_t size, off_t offset, return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_read(path, buf, size, offset, fi); + down_users(); + return ret; + } + lxcfs_error("unknown file type: path=%s, type=%d, fi->fh=%" PRIu64, path, type, fi->fh); @@ -637,6 +697,13 @@ int lxcfs_write(const char *path, const char *buf, size_t size, off_t offset, return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_write(path, buf, size, offset, fi); + down_users(); + return ret; + } + return -EINVAL; } @@ -651,6 +718,13 @@ int lxcfs_readlink(const char *path, char *buf, size_t size) return ret; } + if (strncmp(path, "/lxcfs", 6) == 0) { + up_users(); + ret = do_lxcfsctl_readlink(path, buf, size); + down_users(); + return ret; + } + return -EINVAL; } @@ -687,6 +761,13 @@ static int lxcfs_release(const char *path, struct fuse_file_info *fi) return ret; } + if (LXCFS_TYPE_LXCFS(type)) { + up_users(); + ret = do_lxcfsctl_release(path, fi); + down_users(); + return ret; + } + lxcfs_error("unknown file type: path=%s, type=%d, fi->fh=%" PRIu64, path, type, fi->fh); @@ -733,6 +814,9 @@ int lxcfs_chown(const char *path, uid_t uid, gid_t gid) if (strncmp(path, "/sys", 4) == 0) return -EPERM; + if (strncmp(path, "/lxcfs", 6) == 0) + return -EPERM; + return -ENOENT; } @@ -753,6 +837,9 @@ int lxcfs_truncate(const char *path, off_t newsize) if (strncmp(path, "/sys", 4) == 0) return 0; + if (strncmp(path, "/lxcfs", 6) == 0) + return 0; + return -EPERM; } @@ -791,6 +878,9 @@ int lxcfs_chmod(const char *path, mode_t mode) if (strncmp(path, "/sys", 4) == 0) return -EPERM; + if (strncmp(path, "/lxcfs", 6) == 0) + return -EPERM; + return -ENOENT; } From b06c5717aa25b3059383cb72c155ae6c4794e700 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Sun, 28 Apr 2024 22:05:34 +0200 Subject: [PATCH 15/17] src/bindings: add features list Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 4 ++++ src/bindings.h | 22 ++++++++++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/bindings.c b/src/bindings.c index d85d424d..752114a5 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -45,6 +45,10 @@ #define BASEDIR "/lxcfs/controllers" #define ROOTDIR "/lxcfs/root" +feature_t per_instance_features[63] = { + { } +}; + static bool can_use_pidfd; static bool can_use_swap; static bool can_use_sys_cpu; diff --git a/src/bindings.h b/src/bindings.h index e8b86b5c..812b0e03 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -100,16 +100,30 @@ extern int rwlock_rdlock_interruptible(pthread_rwlock_t *l); extern int rwlock_wrlock_interruptible(pthread_rwlock_t *l); struct file_info { - char *controller; - char *cgroup; - char *file; + union { + struct { + char *controller; + char *cgroup; + char *file; + }; + struct { + void *private_data; + }; + }; + int type; char *buf; /* unused */ int buflen; - int size; /*actual data size */ + int size; /* actual data size */ int cached; }; +typedef struct feature { + char *name; +} feature_t; + +extern feature_t per_instance_features[]; + enum lxcfs_feature_op { LXCFS_FEATURE_CHECK, LXCFS_FEATURE_SET, From dca286f21e1b79a9c50ff6f532d5beaabfaa71e8 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Sun, 28 Apr 2024 22:05:51 +0200 Subject: [PATCH 16/17] proc_fuse: add disable_uptime feature Signed-off-by: Alexander Mikhalitsyn --- src/bindings.c | 1 + src/bindings.h | 1 + src/proc_fuse.c | 3 +++ 3 files changed, 5 insertions(+) diff --git a/src/bindings.c b/src/bindings.c index 752114a5..e3f9d611 100644 --- a/src/bindings.c +++ b/src/bindings.c @@ -46,6 +46,7 @@ #define ROOTDIR "/lxcfs/root" feature_t per_instance_features[63] = { + { .name = "disable_uptime", }, { } }; diff --git a/src/bindings.h b/src/bindings.h index 812b0e03..3b6795f6 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -162,6 +162,7 @@ struct pidns_store { /* Do not free on liblxcfs reload (contains useful persistent data) */ bool keep_on_reload; +#define LXCFS_FEATURES_DISABLE_UPTIME (1 << 0) /* bit mask for per-instance configuration options (on/off) */ __u64 features; }; diff --git a/src/proc_fuse.c b/src/proc_fuse.c index f48c40b1..f759b756 100644 --- a/src/proc_fuse.c +++ b/src/proc_fuse.c @@ -948,6 +948,9 @@ static int proc_uptime_read(char *buf, size_t size, off_t offset, return total_len; } + if (check_set_lxcfs_feature(fc->pid, LXCFS_FEATURE_CHECK, LXCFS_FEATURES_DISABLE_UPTIME)) + return read_file_fuse("/proc/uptime", buf, size, d); + reaperage = get_reaper_age(fc->pid); /* * To understand why this is done, please read the comment to the From b4a17e3ae223a424cc4ee882f832de9e2019b206 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Wed, 24 Apr 2024 18:50:46 +0200 Subject: [PATCH 17/17] src/lxcfsctl_fuse: add implementation of lxcfs config subtree Signed-off-by: Alexander Mikhalitsyn --- meson.build | 2 + src/bindings.h | 6 +- src/lxcfsctl_fuse.c | 474 ++++++++++++++++++++++++++++++++++++++++++++ src/lxcfsctl_fuse.h | 29 +++ 4 files changed, 510 insertions(+), 1 deletion(-) create mode 100644 src/lxcfsctl_fuse.c create mode 100644 src/lxcfsctl_fuse.h diff --git a/meson.build b/meson.build index 68787738..d554f362 100644 --- a/meson.build +++ b/meson.build @@ -222,6 +222,8 @@ liblxcfs_sources = files( 'src/syscall_numbers.h', 'src/sysfs_fuse.c', 'src/sysfs_fuse.h', + 'src/lxcfsctl_fuse.c', + 'src/lxcfsctl_fuse.h', 'src/utils.c', 'src/utils.h') diff --git a/src/bindings.h b/src/bindings.h index 3b6795f6..e09b050c 100644 --- a/src/bindings.h +++ b/src/bindings.h @@ -69,7 +69,11 @@ enum lxcfs_virt_t { #define LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH "/sys/devices/system/cpu/online" LXC_TYPE_LXCFS, - + LXC_TYPE_LXCFS_PIDNS_SUBDIR, + LXC_TYPE_LXCFS_PIDNS_PID_SUBDIR, + LXC_TYPE_LXCFS_PIDNS_CURPID_SUBDIR, + LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_SUBDIR, + LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE, LXC_TYPE_MAX, }; diff --git a/src/lxcfsctl_fuse.c b/src/lxcfsctl_fuse.c new file mode 100644 index 00000000..36833651 --- /dev/null +++ b/src/lxcfsctl_fuse.c @@ -0,0 +1,474 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sysfs_fuse.h" + +#include "bindings.h" +#include "memory_utils.h" +#include "cgroups/cgroup.h" +#include "lxcfs_fuse_compat.h" +#include "utils.h" + +typedef struct lxcfsctl_dentry_data { + int type; + pid_t initpid; + int feature; +} lxcfsctl_dentry_data_t; + +static int lxcfsctl_get_dentry_type(const char *path, lxcfsctl_dentry_data_t *data) +{ + struct fuse_context *fc = fuse_get_context(); + + memset(data, 0, sizeof(*data)); + + if (strcmp(path, "/lxcfs") == 0) { + data->type = LXC_TYPE_LXCFS; + + return 0; + } else if (strcmp(path, "/lxcfs/pidns") == 0) { + data->type = LXC_TYPE_LXCFS_PIDNS_SUBDIR; + + return 0; + } else if (strncmp(path, "/lxcfs/pidns/", STRLITERALLEN("/lxcfs/pidns/")) == 0) { + unsigned int pid; + char subpathbuf[101] = { 0 }; + const char *subpath; + + if (strncmp(path, "/lxcfs/pidns/current", STRLITERALLEN("/lxcfs/pidns/current")) == 0) { + pid = fc->pid; + subpath = path + STRLITERALLEN("/lxcfs/pidns/current"); + } else { + int i; + + /* this path is only allowed for host admin */ + if (!proc_has_capability_in(getpid(), fc->pid, CAP_SYS_ADMIN)) + return -ENOENT; + + i = sscanf(path, "/lxcfs/pidns/%u%100s", &pid, subpathbuf); + if (i < 1) + return -ENOENT; + subpathbuf[sizeof(subpathbuf)-1] = '\0'; + subpath = subpathbuf; + } + + if (lookup_initpid_in_store(pid) < 0) + return -ENOENT; + + data->initpid = pid; + + if (strlen(subpath) == 0) { + data->type = LXC_TYPE_LXCFS_PIDNS_PID_SUBDIR; + + return 0; + } else if (strcmp(subpath, "/features") == 0) { + data->type = LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_SUBDIR; + + return 0; + } else if (strncmp(subpath, "/features/", STRLITERALLEN("/features/")) == 0) { + const char *feature = subpath + STRLITERALLEN("/features/"); + + for (int i = 0; per_instance_features[i].name; i++) { + if (strcmp(per_instance_features[i].name, feature) == 0) { + data->type = LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE; + data->feature = i; + return 0; + } + } + + return -ENOENT; + } + } + + return -ENOENT; +} + +__lxcfs_fuse_ops int lxcfsctl_getattr(const char *path, struct stat *sb) +{ + lxcfsctl_dentry_data_t d_data; + struct timespec now; + + memset(sb, 0, sizeof(struct stat)); + if (clock_gettime(CLOCK_REALTIME, &now) < 0) + return -EINVAL; + + sb->st_uid = sb->st_gid = 0; + sb->st_atim = sb->st_mtim = sb->st_ctim = now; + + if (lxcfsctl_get_dentry_type(path, &d_data)) + return -ENOENT; + + switch (d_data.type) { + case LXC_TYPE_LXCFS: + case LXC_TYPE_LXCFS_PIDNS_SUBDIR: + case LXC_TYPE_LXCFS_PIDNS_PID_SUBDIR: + case LXC_TYPE_LXCFS_PIDNS_CURPID_SUBDIR: + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_SUBDIR: + sb->st_mode = S_IFDIR | 00555; + sb->st_nlink = 2; + + return 0; + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE: + sb->st_size = 1; + + sb->st_mode = S_IFREG | 00444; + sb->st_nlink = 1; + return 0; + } + + return -ENOENT; +} + +static int lxcfs_features_dir_filler(fuse_fill_dir_t filler, void *buf) +{ + for (int i = 0; per_instance_features[i].name; i++) { + if (dir_filler(filler, buf, per_instance_features[i].name, 0) != 0) + return -ENOENT; + } + + return 0; +} + +struct pidns_iter_filler_args { + fuse_fill_dir_t filler; + void *buf; +}; + +int features_dir_filler(struct pidns_store *cur, void *data) +{ + struct pidns_iter_filler_args *args = data; + char dname[INTTYPE_TO_STRLEN(typeof(cur->initpid))]; + + snprintf(dname, sizeof(dname), "%d", cur->initpid); + + if (dir_filler(args->filler, args->buf, dname, 0) != 0) + return -ENOENT; + + return 0; +} + +static int lxcfs_pidns_dir_filler(fuse_fill_dir_t filler, void *buf) +{ + struct pidns_iter_filler_args args = { + .filler = filler, + .buf = buf, + }; + + return iter_initpid_store(features_dir_filler, &args); +} + +__lxcfs_fuse_ops int lxcfsctl_readdir(const char *path, void *buf, + fuse_fill_dir_t filler, off_t offset, + struct fuse_file_info *fi) +{ + struct fuse_context *fc = fuse_get_context(); + struct file_info *f = INTTYPE_TO_PTR(fi->fh); + + if (!liblxcfs_functional()) + return -EIO; + + if (!f) + return -EIO; + + switch (f->type) { + case LXC_TYPE_LXCFS: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "pidns", 0) != 0) + return -ENOENT; + return 0; + case LXC_TYPE_LXCFS_PIDNS_SUBDIR: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "current", 0) != 0) + return -ENOENT; + + /* show all pid namespaces for the host admin */ + if (proc_has_capability_in(getpid(), fc->pid, CAP_SYS_ADMIN) && + lxcfs_pidns_dir_filler(filler, buf)) + return -ENOENT; + + return 0; + case LXC_TYPE_LXCFS_PIDNS_PID_SUBDIR: + case LXC_TYPE_LXCFS_PIDNS_CURPID_SUBDIR: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0 || + dir_filler(filler, buf, "features", 0) != 0) + return -ENOENT; + return 0; + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_SUBDIR: + if (dir_filler(filler, buf, ".", 0) != 0 || + dir_filler(filler, buf, "..", 0) != 0) + return -ENOENT; + return lxcfs_features_dir_filler(filler, buf); + } + + return -EINVAL; +} + +typedef struct lxcfsctl_file_data { + /* increase version if the structure was changed */ + __u16 version; + + pid_t initpid; + int feature; +} lxcfsctl_file_data_t; + +__lxcfs_fuse_ops int lxcfsctl_open(const char *path, struct fuse_file_info *fi) +{ + struct fuse_context *fc = fuse_get_context(); + __do_free struct file_info *info = NULL; + int type = -1; + __do_free lxcfsctl_file_data_t *private_data = NULL; + lxcfsctl_dentry_data_t d_data; + + if (!liblxcfs_functional()) + return -EIO; + + if (lxcfsctl_get_dentry_type(path, &d_data)) + return -ENOENT; + + if (!proc_has_capability_in(d_data.initpid, fc->pid, CAP_SYS_ADMIN) && + !proc_has_capability_in(getpid(), fc->pid, CAP_SYS_ADMIN)) + return -EACCES; + + type = d_data.type; + if (type == -1) + return -ENOENT; + + private_data = zalloc(sizeof(*private_data)); + if (!private_data) + return -EIO; + + private_data->version = 1; + private_data->initpid = d_data.initpid; + private_data->feature = d_data.feature; + + info = zalloc(sizeof(*info)); + if (!info) + return -ENOMEM; + + info->type = type; + info->buflen = BUF_RESERVE_SIZE; + + info->buf = malloc(info->buflen); + if (!info->buf) + return -ENOMEM; + + memset(info->buf, 0, info->buflen); + /* set actual size to buffer size */ + info->size = info->buflen; + + info->private_data = move_ptr(private_data); + + fi->fh = PTR_TO_UINT64(move_ptr(info)); + return 0; +} + +__lxcfs_fuse_ops int lxcfsctl_release(const char *path, struct fuse_file_info *fi) +{ + struct file_info *f; + + f = INTTYPE_TO_PTR(fi->fh); + if (!f) + return 0; + + /* free lxcfsctl_file_data_t */ + free_disarm(f->private_data); + + do_release_file_info(fi); + return 0; +} + +__lxcfs_fuse_ops int lxcfsctl_opendir(const char *path, struct fuse_file_info *fi) +{ + __do_free struct file_info *dir_info = NULL; + int type = -1; + lxcfsctl_dentry_data_t d_data; + + if (!liblxcfs_functional()) + return -EIO; + + if (lxcfsctl_get_dentry_type(path, &d_data)) + return -ENOENT; + + type = d_data.type; + + if (type == -1) + return -ENOENT; + + dir_info = zalloc(sizeof(*dir_info)); + if (!dir_info) + return -ENOMEM; + + dir_info->type = type; + dir_info->buf = NULL; + dir_info->file = NULL; + dir_info->buflen = 0; + + fi->fh = PTR_TO_UINT64(move_ptr(dir_info)); + return 0; +} + +__lxcfs_fuse_ops int lxcfsctl_releasedir(const char *path, struct fuse_file_info *fi) +{ + do_release_file_info(fi); + return 0; +} + +static int lxcfsctl_read_feature(char *buf, size_t size, off_t offset, + struct fuse_file_info *fi) +{ + struct file_info *d = INTTYPE_TO_PTR(fi->fh); + char *cache = d->buf; + ssize_t total_len = 0, ret = 0; + lxcfsctl_dentry_data_t *d_data = d->private_data; + bool state; + + if (offset) { + size_t left; + + if (offset > d->size) + return -EINVAL; + + if (!d->cached) + return 0; + + left = d->size - offset; + total_len = left > size ? size : left; + memcpy(buf, cache + offset, total_len); + + return total_len; + } + + state = check_set_lxcfs_feature(d_data->initpid, LXCFS_FEATURE_CHECK, (1 << d_data->feature)); + + ret = snprintf(d->buf, d->buflen, "%d\n", state); + if (ret < 0 || ret >= d->buflen) + return -EIO; + total_len = ret; + + d->cached = 1; + d->size = total_len; + if ((size_t)total_len > size) + total_len = size; + memcpy(buf, d->buf, total_len); + + return total_len; +} + +static int lxcfsctl_write_feature(const char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + struct file_info *f = INTTYPE_TO_PTR(fi->fh); + lxcfsctl_dentry_data_t *d_data; + + if (!size) + return -EINVAL; + + if (!liblxcfs_functional()) + return -EIO; + + if (!f) + return -EIO; + + if (f->type != LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE) + return -EINVAL; + + d_data = f->private_data; + if (!d_data) + return -EIO; + + if (buf[0] == '0') { + if (!check_set_lxcfs_feature(d_data->initpid, LXCFS_FEATURE_CLEAR, (1 << d_data->feature))) + return -EIO; + + return size; + } else if (buf[0] == '1') { + if (!check_set_lxcfs_feature(d_data->initpid, LXCFS_FEATURE_SET, (1 << d_data->feature))) + return -EIO; + + return size; + } + + return -EINVAL; +} + +__lxcfs_fuse_ops int lxcfsctl_write(const char *path, char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + struct file_info *f = INTTYPE_TO_PTR(fi->fh); + + if (!size) + return -EINVAL; + + if (!liblxcfs_functional()) + return -EIO; + + if (!f) + return -EIO; + + switch (f->type) { + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE: + return lxcfsctl_write_feature(buf, size, offset, fi); + } + + return -EOPNOTSUPP; +} + +__lxcfs_fuse_ops int lxcfsctl_read(const char *path, char *buf, size_t size, + off_t offset, struct fuse_file_info *fi) +{ + struct file_info *f = INTTYPE_TO_PTR(fi->fh); + + if (!liblxcfs_functional()) + return -EIO; + + if (!f) + return -EIO; + + switch (f->type) { + case LXC_TYPE_LXCFS_PIDNS_PID_FEATURES_F_SUBFILE: + return lxcfsctl_read_feature(buf, size, offset, fi); + } + + return -EOPNOTSUPP; +} + +__lxcfs_fuse_ops int lxcfsctl_access(const char *path, int mask) +{ + return -EOPNOTSUPP; +} + +__lxcfs_fuse_ops int lxcfsctl_readlink(const char *path, char *buf, size_t size) +{ + return -EOPNOTSUPP; +} diff --git a/src/lxcfsctl_fuse.h b/src/lxcfsctl_fuse.h new file mode 100644 index 00000000..bb3d6d41 --- /dev/null +++ b/src/lxcfsctl_fuse.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#ifndef __LXCFSCTL_FUSE_H +#define __LXCFSCTL_FUSE_H + +#include "config.h" + +#include +#include +#include +#include +#include + +#include "lxcfs_fuse.h" + +#include "macro.h" + +__visible extern int lxcfsctl_getattr(const char *path, struct stat *sb); +__visible extern int lxcfsctl_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi); +__visible extern int lxcfsctl_readlink(const char *path, char *buf, size_t size); +__visible extern int lxcfsctl_release(const char *path, struct fuse_file_info *fi); +__visible extern int lxcfsctl_releasedir(const char *path, struct fuse_file_info *fi); +__visible extern int lxcfsctl_open(const char *path, struct fuse_file_info *fi); +__visible extern int lxcfsctl_opendir(const char *path, struct fuse_file_info *fi); +__visible extern int lxcfsctl_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi); +__visible extern int lxcfsctl_write(const char *path, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi); +__visible extern int lxcfsctl_access(const char *path, int mask); + +#endif /* __LXCFSCTL_FUSE_H */