From 3a07ba795c8f3257f6d814812427e106539c35a3 Mon Sep 17 00:00:00 2001 From: Christian Duerr Date: Thu, 28 Sep 2023 23:46:18 +0200 Subject: [PATCH 1/2] Remove landlock This patch completely removes landlock from the Linux sandbox. While landlock provides solid filesystem isolation even without requiring any intricate Linux sandboxing knowledge, it is currently still too limited to build a "bulletproof" filesystem sandbox. As such, it is better suited for best-effort isolation of "assumed safe" applications, rather than sandboxing of "potentially hazardous" software. For Birdcage, these are the biggest limitations: - Locking down sockets/pipes - High kernel requirement for all basic features (FS_TRUNCATE is 6.2) --- Cargo.toml | 6 -- README.md | 4 +- examples/sandbox.rs | 2 +- src/error.rs | 26 ------- src/lib.rs | 4 +- src/linux/mod.rs | 117 ++++++++------------------------ src/linux/namespaces.rs | 14 ++-- src/macos.rs | 4 +- tests/canonicalize.rs | 2 +- tests/consistent_id_mappings.rs | 2 +- tests/env.rs | 2 +- tests/exec.rs | 2 +- tests/fs.rs | 2 +- tests/fs_without_landlock.rs | 58 ---------------- tests/full_env.rs | 2 +- tests/full_sandbox.rs | 2 +- tests/net.rs | 2 +- tests/net_without_namespaces.rs | 2 +- tests/net_without_seccomp.rs | 2 +- 19 files changed, 53 insertions(+), 202 deletions(-) delete mode 100644 tests/fs_without_landlock.rs diff --git a/Cargo.toml b/Cargo.toml index 5da897d..de6c78c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,11 +29,6 @@ name = "fs" path = "tests/fs.rs" harness = false -[[test]] -name = "fs_without_landlock" -path = "tests/fs_without_landlock.rs" -harness = false - [[test]] name = "full_env" path = "tests/full_env.rs" @@ -66,7 +61,6 @@ harness = false [target.'cfg(target_os = "linux")'.dependencies] seccompiler = "0.2.0" -landlock = "0.2.0" libc = "0.2.132" [dev-dependencies] diff --git a/README.md b/README.md index c38d2d0..3d03fb1 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,8 @@ use the example. ## Supported Platforms - - Linux (5.13+) via [Landlock] and [seccomp] + - Linux via [namespaces] and [seccomp] - macOS via `sandbox_init()` (aka Seatbelt) -[landlock]: https://www.kernel.org/doc/html/latest/userspace-api/landlock.html +[namespaces]: https://man7.org/linux/man-pages/man7/namespaces.7.html [seccomp]: https://man7.org/linux/man-pages/man2/seccomp.2.html diff --git a/examples/sandbox.rs b/examples/sandbox.rs index 7a4f97f..dcfc50d 100644 --- a/examples/sandbox.rs +++ b/examples/sandbox.rs @@ -42,7 +42,7 @@ fn main() -> Result<(), Box> { let cli = Cli::parse(); // Setup sandbox and its exceptions. - let mut birdcage = Birdcage::new()?; + let mut birdcage = Birdcage::new(); for path in cli.allow_read { birdcage.add_exception(Exception::Read(path))?; diff --git a/src/error.rs b/src/error.rs index 6873092..08fa965 100644 --- a/src/error.rs +++ b/src/error.rs @@ -7,8 +7,6 @@ use std::fmt::{self, Display, Formatter}; use std::io::Error as IoError; use std::result::Result as StdResult; -#[cfg(target_os = "linux")] -use landlock::{PathFdError, RulesetError}; #[cfg(target_os = "linux")] use seccompiler::{BackendError, Error as SeccompError}; @@ -18,17 +16,11 @@ pub type Result = StdResult; /// Sandboxing error. #[derive(Debug)] pub enum Error { - /// Landlock ruleset creation/modification error. - #[cfg(target_os = "linux")] - Ruleset(RulesetError), - /// Seccomp errors. #[cfg(target_os = "linux")] Seccomp(SeccompError), /// Invalid sandbox exception path. - #[cfg(target_os = "linux")] - InvalidPath(PathFdError), #[cfg(target_os = "macos")] InvalidPath(InvalidPathError), @@ -44,12 +36,8 @@ impl StdError for Error {} impl Display for Error { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - #[cfg(target_os = "linux")] - Self::Ruleset(error) => write!(f, "landlock ruleset error: {error}"), #[cfg(target_os = "linux")] Self::Seccomp(error) => write!(f, "seccomp error: {error}"), - #[cfg(target_os = "linux")] - Self::InvalidPath(error) => write!(f, "invalid path: {error}"), #[cfg(target_os = "macos")] Self::InvalidPath(error) => write!(f, "invalid path: {error:?}"), Self::Io(error) => write!(f, "input/output error: {error}"), @@ -60,13 +48,6 @@ impl Display for Error { } } -#[cfg(target_os = "linux")] -impl From for Error { - fn from(error: RulesetError) -> Self { - Self::Ruleset(error) - } -} - #[cfg(target_os = "linux")] impl From for Error { fn from(error: SeccompError) -> Self { @@ -81,13 +62,6 @@ impl From for Error { } } -#[cfg(target_os = "linux")] -impl From for Error { - fn from(error: PathFdError) -> Self { - Self::InvalidPath(error) - } -} - #[cfg(target_os = "macos")] impl From for Error { fn from(error: InvalidPathError) -> Self { diff --git a/src/lib.rs b/src/lib.rs index 5db6fa4..f52c02c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,7 +18,7 @@ //! fs::read_to_string(file.path()).unwrap(); //! //! // Initialize the sandbox; by default everything is prohibited. -//! Birdcage::new().unwrap().lock().unwrap(); +//! Birdcage::new().lock().unwrap(); //! //! // Reads with sandbox should fail. //! let result = fs::read_to_string(file.path()); @@ -56,7 +56,7 @@ pub type Birdcage = MacSandbox; pub trait Sandbox: Sized { /// Setup the sandboxing environment. - fn new() -> Result; + fn new() -> Self; /// Add a new exception to the sandbox. /// diff --git a/src/linux/mod.rs b/src/linux/mod.rs index a9b8525..be0076c 100644 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -1,17 +1,10 @@ //! Linux sandboxing. -//! -//! This module implements sandboxing on Linux based on the Landlock LSM, -//! namespaces, and seccomp. use std::collections::HashMap; +use std::io::Error as IoError; use std::path::PathBuf; -use landlock::{ - make_bitflags, Access, AccessFs, Compatible, PathBeneath, PathFd, Ruleset, RulesetAttr, - RulesetCreated, RulesetCreatedAttr, RulesetStatus, ABI as LANDLOCK_ABI, -}; - -use crate::error::{Error, Result}; +use crate::error::Result; use crate::linux::namespaces::MountFlags; use crate::linux::seccomp::NetworkFilter; use crate::{Exception, Sandbox}; @@ -19,14 +12,11 @@ use crate::{Exception, Sandbox}; mod namespaces; mod seccomp; -/// Minimum landlock ABI version. -const ABI: LANDLOCK_ABI = LANDLOCK_ABI::V1; - /// Linux sandboxing. +#[derive(Default)] pub struct LinuxSandbox { bind_mounts: HashMap, env_exceptions: Vec, - landlock: RulesetCreated, allow_networking: bool, full_env: bool, } @@ -54,63 +44,19 @@ impl LinuxSandbox { } impl Sandbox for LinuxSandbox { - fn new() -> Result { - // Setup landlock filtering. - let mut landlock = Ruleset::new() - .set_best_effort(false) - .handle_access(AccessFs::from_all(ABI))? - .create()?; - landlock.as_mut().set_no_new_privs(true); - - Ok(Self { - landlock, - allow_networking: false, - full_env: false, - env_exceptions: Default::default(), - bind_mounts: Default::default(), - }) + fn new() -> Self { + Self::default() } fn add_exception(&mut self, exception: Exception) -> Result<&mut Self> { - let (path_fd, access) = match exception { - Exception::Read(path) => { - let path_fd = PathFd::new(&path)?; - - self.update_bind_mount(path, false, false); - - (path_fd, make_bitflags!(AccessFs::{ ReadFile | ReadDir })) - }, - Exception::Write(path) => { - let path_fd = PathFd::new(&path)?; - - self.update_bind_mount(path, true, false); - - (path_fd, AccessFs::from_write(ABI)) - }, - Exception::ExecuteAndRead(path) => { - let path_fd = PathFd::new(&path)?; - - self.update_bind_mount(path, false, true); - - (path_fd, AccessFs::from_read(ABI)) - }, - Exception::Environment(key) => { - self.env_exceptions.push(key); - return Ok(self); - }, - Exception::FullEnvironment => { - self.full_env = true; - return Ok(self); - }, - Exception::Networking => { - self.allow_networking = true; - return Ok(self); - }, - }; - - let rule = PathBeneath::new(path_fd, access); - - self.landlock.as_mut().add_rule(rule)?; + match exception { + Exception::Read(path) => self.update_bind_mount(path, false, false), + Exception::Write(path) => self.update_bind_mount(path, true, false), + Exception::ExecuteAndRead(path) => self.update_bind_mount(path, false, true), + Exception::Environment(key) => self.env_exceptions.push(key), + Exception::FullEnvironment => self.full_env = true, + Exception::Networking => self.allow_networking = true, + } Ok(self) } @@ -122,32 +68,29 @@ impl Sandbox for LinuxSandbox { } // Setup namespaces. - let namespace_result = - namespaces::create_namespaces(self.allow_networking, self.bind_mounts); + namespaces::create_namespaces(self.allow_networking, self.bind_mounts)?; // Setup seccomp network filter. if !self.allow_networking { - let seccomp_result = NetworkFilter::apply(); - - // Propagate failure if neither seccomp nor namespaces could isolate networking. - if let (Err(_), Err(err)) = (&namespace_result, seccomp_result) { - return Err(err); - } + let _ = NetworkFilter::apply(); } - // Use landlock only if namespaces failed. - if namespace_result.is_ok() { - return Ok(()); - } + // Block suid/sgid. + // + // This is also blocked by our bind mount's MS_NOSUID flag, so we're just + // doubling-down here. + no_new_privs()?; - // Apply landlock rules. - let status = self.landlock.restrict_self()?; + Ok(()) + } +} - // Ensure all restrictions were properly applied. - if status.no_new_privs && status.ruleset == RulesetStatus::FullyEnforced { - Ok(()) - } else { - Err(Error::ActivationFailed("sandbox could not be fully enforced".into())) - } +/// Prevent suid/sgid. +fn no_new_privs() -> Result<()> { + let result = unsafe { libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) }; + + match result { + 0 => Ok(()), + _ => Err(IoError::last_os_error().into()), } } diff --git a/src/linux/namespaces.rs b/src/linux/namespaces.rs index 46bb732..96b43aa 100644 --- a/src/linux/namespaces.rs +++ b/src/linux/namespaces.rs @@ -252,10 +252,9 @@ fn pivot_root(new_root: &CStr, put_old: &CStr) -> Result<()> { fn umount(target: &CStr) -> Result<()> { let result = unsafe { libc::umount2(target.as_ptr(), libc::MNT_DETACH) }; - if result == 0 { - Ok(()) - } else { - Err(IoError::last_os_error().into()) + match result { + 0 => Ok(()), + _ => Err(IoError::last_os_error().into()), } } @@ -288,10 +287,9 @@ fn create_user_namespace( /// Enter a namespace. fn unshare(namespaces: Namespaces) -> Result<()> { let result = unsafe { libc::unshare(namespaces.bits()) }; - if result == 0 { - Ok(()) - } else { - Err(IoError::last_os_error().into()) + match result { + 0 => Ok(()), + _ => Err(IoError::last_os_error().into()), } } diff --git a/src/macos.rs b/src/macos.rs index 515dba1..d580922 100644 --- a/src/macos.rs +++ b/src/macos.rs @@ -35,8 +35,8 @@ pub struct MacSandbox { } impl Sandbox for MacSandbox { - fn new() -> Result { - Ok(Self { profile: DEFAULT_RULE.to_vec(), env_exceptions: Vec::new(), full_env: false }) + fn new() -> Self { + Self { profile: DEFAULT_RULE.to_vec(), env_exceptions: Vec::new(), full_env: false } } fn add_exception(&mut self, exception: Exception) -> Result<&mut Self> { diff --git a/tests/canonicalize.rs b/tests/canonicalize.rs index 5b7772a..9f985d0 100644 --- a/tests/canonicalize.rs +++ b/tests/canonicalize.rs @@ -3,7 +3,7 @@ use std::fs; use birdcage::{Birdcage, Exception, Sandbox}; fn main() { - let mut birdcage = Birdcage::new().unwrap(); + let mut birdcage = Birdcage::new(); birdcage.add_exception(Exception::Read("./".into())).unwrap(); birdcage.lock().unwrap(); diff --git a/tests/consistent_id_mappings.rs b/tests/consistent_id_mappings.rs index b69c862..d47580a 100644 --- a/tests/consistent_id_mappings.rs +++ b/tests/consistent_id_mappings.rs @@ -8,7 +8,7 @@ fn main() { let euid = unsafe { libc::geteuid() }; let egid = unsafe { libc::getegid() }; - let birdcage = Birdcage::new().unwrap(); + let birdcage = Birdcage::new(); birdcage.lock().unwrap(); assert_eq!(uid, unsafe { libc::getuid() }); diff --git a/tests/env.rs b/tests/env.rs index 661785f..8ef1439 100644 --- a/tests/env.rs +++ b/tests/env.rs @@ -8,7 +8,7 @@ fn main() { env::set_var("PRIVATE", "BAD"); // Activate our sandbox. - let mut birdcage = Birdcage::new().unwrap(); + let mut birdcage = Birdcage::new(); birdcage.add_exception(Exception::Environment("PUBLIC".into())).unwrap(); birdcage.lock().unwrap(); diff --git a/tests/exec.rs b/tests/exec.rs index 22f5b8e..84fbb21 100644 --- a/tests/exec.rs +++ b/tests/exec.rs @@ -5,7 +5,7 @@ use std::process::Command; use birdcage::{Birdcage, Exception, Sandbox}; fn main() { - let mut birdcage = Birdcage::new().unwrap(); + let mut birdcage = Birdcage::new(); birdcage.add_exception(Exception::ExecuteAndRead("/usr/bin/true".into())).unwrap(); birdcage.add_exception(Exception::ExecuteAndRead("/usr/lib".into())).unwrap(); if PathBuf::from("/lib64").exists() { diff --git a/tests/fs.rs b/tests/fs.rs index dd8bd31..8b2ca3f 100644 --- a/tests/fs.rs +++ b/tests/fs.rs @@ -13,7 +13,7 @@ fn main() { fs::write(&public_path, FILE_CONTENT.as_bytes()).unwrap(); // Activate our sandbox. - let mut birdcage = Birdcage::new().unwrap(); + let mut birdcage = Birdcage::new(); birdcage.add_exception(Exception::Read(public_path.path().into())).unwrap(); birdcage.lock().unwrap(); diff --git a/tests/fs_without_landlock.rs b/tests/fs_without_landlock.rs deleted file mode 100644 index e7b7bb0..0000000 --- a/tests/fs_without_landlock.rs +++ /dev/null @@ -1,58 +0,0 @@ -#[cfg(target_os = "linux")] -use std::collections::BTreeMap; -#[cfg(target_os = "linux")] -use std::fs; - -#[cfg(target_os = "linux")] -use birdcage::{Birdcage, Exception, Sandbox}; -#[cfg(target_os = "linux")] -use seccompiler::{BpfProgram, SeccompAction, SeccompFilter, TargetArch}; -#[cfg(target_os = "linux")] -use tempfile::NamedTempFile; - -#[cfg(target_os = "linux")] -#[cfg(target_arch = "x86_64")] -const ARCH: TargetArch = TargetArch::x86_64; -#[cfg(target_os = "linux")] -#[cfg(target_arch = "aarch64")] -const ARCH: TargetArch = TargetArch::aarch64; - -#[cfg(target_os = "linux")] -fn main() { - const FILE_CONTENT: &str = "expected content"; - - // Create seccomp filter blocking `landlock_restrict_self` syscall. - let mut rules = BTreeMap::new(); - rules.insert(libc::SYS_landlock_restrict_self, Vec::new()); - let filter = SeccompFilter::new( - rules, - SeccompAction::Allow, - SeccompAction::Errno(libc::EACCES as u32), - ARCH, - ) - .unwrap(); - let program: BpfProgram = filter.try_into().unwrap(); - seccompiler::apply_filter(&program).unwrap(); - - // Setup our test files. - let private_path = NamedTempFile::new().unwrap(); - fs::write(&private_path, FILE_CONTENT.as_bytes()).unwrap(); - let public_path = NamedTempFile::new().unwrap(); - fs::write(&public_path, FILE_CONTENT.as_bytes()).unwrap(); - - // Activate our sandbox. - let mut birdcage = Birdcage::new().unwrap(); - birdcage.add_exception(Exception::Read(public_path.path().into())).unwrap(); - birdcage.lock().unwrap(); - - // Access to the public file is allowed. - let content = fs::read_to_string(public_path).unwrap(); - assert_eq!(content, FILE_CONTENT); - - // Access to the private file is prohibited. - let result = fs::read_to_string(private_path); - assert!(result.is_err()); -} - -#[cfg(not(target_os = "linux"))] -fn main() {} diff --git a/tests/full_env.rs b/tests/full_env.rs index 082ee15..ffb9c8d 100644 --- a/tests/full_env.rs +++ b/tests/full_env.rs @@ -7,7 +7,7 @@ fn main() { env::set_var("PUBLIC", "GOOD"); // Activate our sandbox. - let mut birdcage = Birdcage::new().unwrap(); + let mut birdcage = Birdcage::new(); birdcage.add_exception(Exception::FullEnvironment).unwrap(); birdcage.lock().unwrap(); diff --git a/tests/full_sandbox.rs b/tests/full_sandbox.rs index 3a4f328..e2a384d 100644 --- a/tests/full_sandbox.rs +++ b/tests/full_sandbox.rs @@ -32,7 +32,7 @@ fn main() { assert_eq!(env::var("TEST"), Ok("value".into())); // Activate our sandbox. - Birdcage::new().unwrap().lock().unwrap(); + Birdcage::new().lock().unwrap(); // Ensure sandboxed write is blocked. let result = fs::write(&path, b"x"); diff --git a/tests/net.rs b/tests/net.rs index 9933b8f..adb205e 100644 --- a/tests/net.rs +++ b/tests/net.rs @@ -3,7 +3,7 @@ use std::net::TcpStream; use birdcage::{Birdcage, Exception, Sandbox}; fn main() { - let mut birdcage = Birdcage::new().unwrap(); + let mut birdcage = Birdcage::new(); birdcage.add_exception(Exception::Networking).unwrap(); birdcage.lock().unwrap(); diff --git a/tests/net_without_namespaces.rs b/tests/net_without_namespaces.rs index 88dce6d..85f435c 100644 --- a/tests/net_without_namespaces.rs +++ b/tests/net_without_namespaces.rs @@ -30,7 +30,7 @@ fn main() { let program: BpfProgram = filter.try_into().unwrap(); seccompiler::apply_filter(&program).unwrap(); - let birdcage = Birdcage::new().unwrap(); + let birdcage = Birdcage::new(); let result = birdcage.lock(); // Seccomp isn't supported, so failure is desired. diff --git a/tests/net_without_seccomp.rs b/tests/net_without_seccomp.rs index 0be236e..14407f3 100644 --- a/tests/net_without_seccomp.rs +++ b/tests/net_without_seccomp.rs @@ -42,7 +42,7 @@ fn main() { let program: BpfProgram = filter.try_into().unwrap(); seccompiler::apply_filter(&program).unwrap(); - let birdcage = Birdcage::new().unwrap(); + let birdcage = Birdcage::new(); birdcage.lock().unwrap(); let result = TcpStream::connect("8.8.8.8:443"); From aab3123390fae5497b7b15c1dc14a39de4057def Mon Sep 17 00:00:00 2001 From: Christian Duerr Date: Fri, 29 Sep 2023 00:02:25 +0200 Subject: [PATCH 2/2] Remove seccomp This patch completely removes seccomp from the Linux sandbox. Currently the only usage of seccomp was to block system calls for network filtering. However since user namespaces already isolate networking, seccomp isn't necessary anymore. Seccomp could be useful in the future to limit some system calls that could cause undesired system changes, but these types of system calls usually require elevated permissions already. --- Cargo.toml | 11 - README.md | 3 +- src/error.rs | 23 -- src/linux/mod.rs | 7 - src/linux/seccomp.rs | 526 -------------------------------- tests/net_without_namespaces.rs | 47 --- tests/net_without_seccomp.rs | 53 ---- 7 files changed, 1 insertion(+), 669 deletions(-) delete mode 100644 src/linux/seccomp.rs delete mode 100644 tests/net_without_namespaces.rs delete mode 100644 tests/net_without_seccomp.rs diff --git a/Cargo.toml b/Cargo.toml index de6c78c..b5abad3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,23 +44,12 @@ name = "net" path = "tests/net.rs" harness = false -[[test]] -name = "net_without_seccomp" -path = "tests/net_without_seccomp.rs" -harness = false - -[[test]] -name = "net_without_namespaces" -path = "tests/net_without_namespaces.rs" -harness = false - [[test]] name = "consistent_id_mappings" path = "tests/consistent_id_mappings.rs" harness = false [target.'cfg(target_os = "linux")'.dependencies] -seccompiler = "0.2.0" libc = "0.2.132" [dev-dependencies] diff --git a/README.md b/README.md index 3d03fb1..3e8c9a7 100644 --- a/README.md +++ b/README.md @@ -63,8 +63,7 @@ use the example. ## Supported Platforms - - Linux via [namespaces] and [seccomp] + - Linux via [namespaces] - macOS via `sandbox_init()` (aka Seatbelt) [namespaces]: https://man7.org/linux/man-pages/man7/namespaces.7.html -[seccomp]: https://man7.org/linux/man-pages/man2/seccomp.2.html diff --git a/src/error.rs b/src/error.rs index 08fa965..022e3b0 100644 --- a/src/error.rs +++ b/src/error.rs @@ -7,19 +7,12 @@ use std::fmt::{self, Display, Formatter}; use std::io::Error as IoError; use std::result::Result as StdResult; -#[cfg(target_os = "linux")] -use seccompiler::{BackendError, Error as SeccompError}; - /// Birdcage result type. pub type Result = StdResult; /// Sandboxing error. #[derive(Debug)] pub enum Error { - /// Seccomp errors. - #[cfg(target_os = "linux")] - Seccomp(SeccompError), - /// Invalid sandbox exception path. #[cfg(target_os = "macos")] InvalidPath(InvalidPathError), @@ -36,8 +29,6 @@ impl StdError for Error {} impl Display for Error { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { - #[cfg(target_os = "linux")] - Self::Seccomp(error) => write!(f, "seccomp error: {error}"), #[cfg(target_os = "macos")] Self::InvalidPath(error) => write!(f, "invalid path: {error:?}"), Self::Io(error) => write!(f, "input/output error: {error}"), @@ -48,20 +39,6 @@ impl Display for Error { } } -#[cfg(target_os = "linux")] -impl From for Error { - fn from(error: SeccompError) -> Self { - Self::Seccomp(error) - } -} - -#[cfg(target_os = "linux")] -impl From for Error { - fn from(error: BackendError) -> Self { - Self::Seccomp(SeccompError::Backend(error)) - } -} - #[cfg(target_os = "macos")] impl From for Error { fn from(error: InvalidPathError) -> Self { diff --git a/src/linux/mod.rs b/src/linux/mod.rs index be0076c..b6692c0 100644 --- a/src/linux/mod.rs +++ b/src/linux/mod.rs @@ -6,11 +6,9 @@ use std::path::PathBuf; use crate::error::Result; use crate::linux::namespaces::MountFlags; -use crate::linux::seccomp::NetworkFilter; use crate::{Exception, Sandbox}; mod namespaces; -mod seccomp; /// Linux sandboxing. #[derive(Default)] @@ -70,11 +68,6 @@ impl Sandbox for LinuxSandbox { // Setup namespaces. namespaces::create_namespaces(self.allow_networking, self.bind_mounts)?; - // Setup seccomp network filter. - if !self.allow_networking { - let _ = NetworkFilter::apply(); - } - // Block suid/sgid. // // This is also blocked by our bind mount's MS_NOSUID flag, so we're just diff --git a/src/linux/seccomp.rs b/src/linux/seccomp.rs deleted file mode 100644 index ee811f3..0000000 --- a/src/linux/seccomp.rs +++ /dev/null @@ -1,526 +0,0 @@ -//! Seccomp system call filtering. - -use std::collections::BTreeMap; - -use seccompiler::{ - BpfProgram, SeccompAction, SeccompCmpArgLen, SeccompCmpOp, SeccompCondition, SeccompFilter, - SeccompRule, TargetArch, -}; - -use crate::Result; - -#[cfg(target_arch = "x86_64")] -const ARCH: TargetArch = TargetArch::x86_64; -#[cfg(target_arch = "aarch64")] -const ARCH: TargetArch = TargetArch::aarch64; - -/// Seccomp network filter. -#[derive(Default)] -pub struct NetworkFilter; - -impl NetworkFilter { - /// Apply all rules in this filter. - pub fn apply() -> Result<()> { - let mut rules = BTreeMap::new(); - - // Add unconditionally allowed syscalls. - for syscall in SYSCALL_WHITELIST { - rules.insert(*syscall, Vec::new()); - } - - // Add socket syscalls which do not perform network operations. - Self::add_local_socket_whitelist(&mut rules)?; - - let filter = SeccompFilter::new( - rules, - // Action performed if no rules match. - SeccompAction::Errno(libc::EACCES as u32), - // Action performed if any rule matches. - SeccompAction::Allow, - ARCH, - )?; - let program: BpfProgram = filter.try_into()?; - seccompiler::apply_filter(&program)?; - - Ok(()) - } - - /// Allow local filesystem sockets. - fn add_local_socket_whitelist( - rules: &mut BTreeMap>, - ) -> Result<()> { - // Allow local AF_UNIX/AF_LOCAL sockets. - let allow_unix = SeccompCondition::new( - 0, - SeccompCmpArgLen::Dword, - SeccompCmpOp::Eq, - libc::AF_UNIX as u64, - )?; - let unix_rule = SeccompRule::new(vec![allow_unix])?; - - // Allow local IPC AF_NETLINK sockets. - let allow_netlink = SeccompCondition::new( - 0, - SeccompCmpArgLen::Dword, - SeccompCmpOp::Eq, - libc::AF_NETLINK as u64, - )?; - let netlink_rule = SeccompRule::new(vec![allow_netlink])?; - - let socket_rule = vec![unix_rule, netlink_rule]; - - // Restrict socket creation to allowed socket domain types. - rules.insert(libc::SYS_socketpair, socket_rule.clone()); - rules.insert(libc::SYS_socket, socket_rule); - - Ok(()) - } -} - -/// Unconditionally allowed syscalls for networking. -const SYSCALL_WHITELIST: &[libc::c_long] = &[ - libc::SYS_read, - libc::SYS_write, - #[cfg(target_arch = "x86_64")] - libc::SYS_open, - libc::SYS_close, - #[cfg(target_arch = "x86_64")] - libc::SYS_stat, - libc::SYS_fstat, - #[cfg(target_arch = "x86_64")] - libc::SYS_lstat, - #[cfg(target_arch = "x86_64")] - libc::SYS_poll, - libc::SYS_lseek, - libc::SYS_mmap, - libc::SYS_mprotect, - libc::SYS_munmap, - libc::SYS_brk, - libc::SYS_rt_sigaction, - libc::SYS_rt_sigprocmask, - libc::SYS_rt_sigreturn, - libc::SYS_ioctl, - libc::SYS_pread64, - libc::SYS_pwrite64, - libc::SYS_readv, - libc::SYS_writev, - #[cfg(target_arch = "x86_64")] - libc::SYS_access, - #[cfg(target_arch = "x86_64")] - libc::SYS_pipe, - #[cfg(target_arch = "x86_64")] - libc::SYS_select, - libc::SYS_sched_yield, - libc::SYS_mremap, - libc::SYS_msync, - libc::SYS_mincore, - libc::SYS_madvise, - libc::SYS_shmget, - libc::SYS_shmat, - libc::SYS_shmctl, - libc::SYS_dup, - #[cfg(target_arch = "x86_64")] - libc::SYS_dup2, - #[cfg(target_arch = "x86_64")] - libc::SYS_pause, - libc::SYS_nanosleep, - libc::SYS_getitimer, - #[cfg(target_arch = "x86_64")] - libc::SYS_alarm, - libc::SYS_setitimer, - libc::SYS_getpid, - #[cfg(target_arch = "x86_64")] - libc::SYS_sendfile, - libc::SYS_connect, - libc::SYS_accept, - libc::SYS_sendto, - libc::SYS_recvfrom, - libc::SYS_sendmsg, - libc::SYS_recvmsg, - libc::SYS_shutdown, - libc::SYS_bind, - libc::SYS_listen, - libc::SYS_getsockname, - libc::SYS_getpeername, - libc::SYS_setsockopt, - libc::SYS_getsockopt, - libc::SYS_clone, - #[cfg(target_arch = "x86_64")] - libc::SYS_fork, - #[cfg(target_arch = "x86_64")] - libc::SYS_vfork, - libc::SYS_execve, - libc::SYS_exit, - libc::SYS_wait4, - libc::SYS_kill, - libc::SYS_uname, - libc::SYS_semget, - libc::SYS_semop, - libc::SYS_semctl, - libc::SYS_shmdt, - libc::SYS_msgget, - libc::SYS_msgsnd, - libc::SYS_msgrcv, - libc::SYS_msgctl, - libc::SYS_fcntl, - libc::SYS_flock, - libc::SYS_fsync, - libc::SYS_fdatasync, - libc::SYS_truncate, - libc::SYS_ftruncate, - #[cfg(target_arch = "x86_64")] - libc::SYS_getdents, - libc::SYS_getcwd, - libc::SYS_chdir, - libc::SYS_fchdir, - #[cfg(target_arch = "x86_64")] - libc::SYS_rename, - #[cfg(target_arch = "x86_64")] - libc::SYS_mkdir, - #[cfg(target_arch = "x86_64")] - libc::SYS_rmdir, - #[cfg(target_arch = "x86_64")] - libc::SYS_creat, - #[cfg(target_arch = "x86_64")] - libc::SYS_link, - #[cfg(target_arch = "x86_64")] - libc::SYS_unlink, - #[cfg(target_arch = "x86_64")] - libc::SYS_symlink, - #[cfg(target_arch = "x86_64")] - libc::SYS_readlink, - #[cfg(target_arch = "x86_64")] - libc::SYS_chmod, - libc::SYS_fchmod, - #[cfg(target_arch = "x86_64")] - libc::SYS_chown, - libc::SYS_fchown, - #[cfg(target_arch = "x86_64")] - libc::SYS_lchown, - libc::SYS_umask, - libc::SYS_gettimeofday, - libc::SYS_getrlimit, - libc::SYS_getrusage, - libc::SYS_sysinfo, - libc::SYS_times, - libc::SYS_ptrace, - libc::SYS_getuid, - libc::SYS_syslog, - libc::SYS_getgid, - libc::SYS_setuid, - libc::SYS_setgid, - libc::SYS_geteuid, - libc::SYS_getegid, - libc::SYS_setpgid, - libc::SYS_getppid, - #[cfg(target_arch = "x86_64")] - libc::SYS_getpgrp, - libc::SYS_setsid, - libc::SYS_setreuid, - libc::SYS_setregid, - libc::SYS_getgroups, - libc::SYS_setgroups, - libc::SYS_setresuid, - libc::SYS_getresuid, - libc::SYS_setresgid, - libc::SYS_getresgid, - libc::SYS_getpgid, - libc::SYS_setfsuid, - libc::SYS_setfsgid, - libc::SYS_getsid, - libc::SYS_capget, - libc::SYS_capset, - libc::SYS_rt_sigpending, - libc::SYS_rt_sigtimedwait, - libc::SYS_rt_sigqueueinfo, - libc::SYS_rt_sigsuspend, - libc::SYS_sigaltstack, - #[cfg(target_arch = "x86_64")] - libc::SYS_utime, - #[cfg(target_arch = "x86_64")] - libc::SYS_mknod, - libc::SYS_personality, - #[cfg(target_arch = "x86_64")] - libc::SYS_ustat, - libc::SYS_statfs, - libc::SYS_fstatfs, - libc::SYS_getpriority, - libc::SYS_setpriority, - libc::SYS_sched_setparam, - libc::SYS_sched_getparam, - libc::SYS_sched_setscheduler, - libc::SYS_sched_getscheduler, - libc::SYS_sched_get_priority_max, - libc::SYS_sched_get_priority_min, - libc::SYS_sched_rr_get_interval, - libc::SYS_mlock, - libc::SYS_munlock, - libc::SYS_mlockall, - libc::SYS_munlockall, - libc::SYS_vhangup, - #[cfg(target_arch = "x86_64")] - libc::SYS_modify_ldt, - libc::SYS_pivot_root, - libc::SYS_prctl, - #[cfg(target_arch = "x86_64")] - libc::SYS_arch_prctl, - libc::SYS_adjtimex, - libc::SYS_setrlimit, - libc::SYS_chroot, - libc::SYS_sync, - libc::SYS_acct, - libc::SYS_settimeofday, - libc::SYS_umount2, - libc::SYS_swapon, - libc::SYS_swapoff, - libc::SYS_reboot, - libc::SYS_sethostname, - libc::SYS_setdomainname, - #[cfg(target_arch = "x86_64")] - libc::SYS_get_kernel_syms, - libc::SYS_quotactl, - libc::SYS_gettid, - libc::SYS_readahead, - libc::SYS_setxattr, - libc::SYS_lsetxattr, - libc::SYS_fsetxattr, - libc::SYS_getxattr, - libc::SYS_lgetxattr, - libc::SYS_fgetxattr, - libc::SYS_listxattr, - libc::SYS_llistxattr, - libc::SYS_flistxattr, - libc::SYS_removexattr, - libc::SYS_lremovexattr, - libc::SYS_fremovexattr, - libc::SYS_tkill, - #[cfg(target_arch = "x86_64")] - libc::SYS_time, - libc::SYS_futex, - libc::SYS_sched_setaffinity, - libc::SYS_sched_getaffinity, - #[cfg(target_arch = "x86_64")] - libc::SYS_set_thread_area, - libc::SYS_io_setup, - libc::SYS_io_destroy, - libc::SYS_io_getevents, - libc::SYS_io_submit, - libc::SYS_io_cancel, - #[cfg(target_arch = "x86_64")] - libc::SYS_get_thread_area, - libc::SYS_lookup_dcookie, - #[cfg(target_arch = "x86_64")] - libc::SYS_epoll_create, - #[cfg(target_arch = "x86_64")] - libc::SYS_epoll_ctl_old, - #[cfg(target_arch = "x86_64")] - libc::SYS_epoll_wait_old, - libc::SYS_remap_file_pages, - libc::SYS_getdents64, - libc::SYS_set_tid_address, - libc::SYS_restart_syscall, - libc::SYS_semtimedop, - #[cfg(target_arch = "x86_64")] - libc::SYS_fadvise64, - libc::SYS_timer_create, - libc::SYS_timer_settime, - libc::SYS_timer_gettime, - libc::SYS_timer_getoverrun, - libc::SYS_timer_delete, - libc::SYS_clock_settime, - libc::SYS_clock_gettime, - libc::SYS_clock_getres, - libc::SYS_clock_nanosleep, - libc::SYS_exit_group, - #[cfg(target_arch = "x86_64")] - libc::SYS_epoll_wait, - libc::SYS_epoll_ctl, - libc::SYS_tgkill, - #[cfg(target_arch = "x86_64")] - libc::SYS_utimes, - libc::SYS_mbind, - libc::SYS_set_mempolicy, - libc::SYS_get_mempolicy, - libc::SYS_mq_open, - libc::SYS_mq_unlink, - libc::SYS_mq_timedsend, - libc::SYS_mq_timedreceive, - libc::SYS_mq_notify, - libc::SYS_mq_getsetattr, - libc::SYS_waitid, - libc::SYS_add_key, - libc::SYS_request_key, - libc::SYS_keyctl, - libc::SYS_ioprio_set, - libc::SYS_ioprio_get, - #[cfg(target_arch = "x86_64")] - libc::SYS_inotify_init, - libc::SYS_inotify_add_watch, - libc::SYS_inotify_rm_watch, - libc::SYS_migrate_pages, - libc::SYS_openat, - libc::SYS_mkdirat, - libc::SYS_mknodat, - libc::SYS_fchownat, - #[cfg(target_arch = "x86_64")] - libc::SYS_futimesat, - libc::SYS_newfstatat, - libc::SYS_unlinkat, - libc::SYS_renameat, - libc::SYS_linkat, - libc::SYS_symlinkat, - libc::SYS_readlinkat, - libc::SYS_fchmodat, - libc::SYS_faccessat, - libc::SYS_pselect6, - libc::SYS_ppoll, - libc::SYS_unshare, - libc::SYS_set_robust_list, - libc::SYS_get_robust_list, - libc::SYS_splice, - libc::SYS_tee, - libc::SYS_sync_file_range, - libc::SYS_vmsplice, - libc::SYS_move_pages, - libc::SYS_utimensat, - libc::SYS_epoll_pwait, - #[cfg(target_arch = "x86_64")] - libc::SYS_signalfd, - libc::SYS_timerfd_create, - #[cfg(target_arch = "x86_64")] - libc::SYS_eventfd, - libc::SYS_fallocate, - libc::SYS_timerfd_settime, - libc::SYS_timerfd_gettime, - libc::SYS_accept4, - libc::SYS_signalfd4, - libc::SYS_eventfd2, - libc::SYS_epoll_create1, - libc::SYS_dup3, - libc::SYS_pipe2, - libc::SYS_inotify_init1, - libc::SYS_preadv, - libc::SYS_pwritev, - libc::SYS_rt_tgsigqueueinfo, - libc::SYS_perf_event_open, - libc::SYS_recvmmsg, - libc::SYS_fanotify_init, - libc::SYS_fanotify_mark, - libc::SYS_prlimit64, - libc::SYS_name_to_handle_at, - libc::SYS_open_by_handle_at, - libc::SYS_clock_adjtime, - libc::SYS_syncfs, - libc::SYS_sendmmsg, - libc::SYS_setns, - libc::SYS_getcpu, - libc::SYS_process_vm_readv, - libc::SYS_process_vm_writev, - libc::SYS_kcmp, - libc::SYS_sched_setattr, - libc::SYS_sched_getattr, - libc::SYS_renameat2, - libc::SYS_seccomp, - libc::SYS_getrandom, - libc::SYS_memfd_create, - libc::SYS_bpf, - libc::SYS_execveat, - libc::SYS_userfaultfd, - libc::SYS_membarrier, - libc::SYS_mlock2, - libc::SYS_copy_file_range, - libc::SYS_preadv2, - libc::SYS_pwritev2, - libc::SYS_pkey_mprotect, - libc::SYS_pkey_alloc, - libc::SYS_pkey_free, - libc::SYS_statx, - libc::SYS_rseq, - libc::SYS_pidfd_send_signal, - libc::SYS_open_tree, - libc::SYS_fsopen, - libc::SYS_fsconfig, - libc::SYS_fspick, - libc::SYS_pidfd_open, - libc::SYS_clone3, - libc::SYS_close_range, - libc::SYS_openat2, - libc::SYS_faccessat2, - libc::SYS_process_madvise, - libc::SYS_epoll_pwait2, - libc::SYS_mount_setattr, - libc::SYS_quotactl_fd, - libc::SYS_landlock_create_ruleset, - libc::SYS_landlock_add_rule, - libc::SYS_landlock_restrict_self, - libc::SYS_memfd_secret, - libc::SYS_process_mrelease, - libc::SYS_futex_waitv, - libc::SYS_set_mempolicy_home_node, -]; - -#[cfg(test)] -mod tests { - use std::io::{Error as IoError, ErrorKind as IoErrorKind}; - - use super::*; - - #[test] - fn block_io_uring() { - NetworkFilter::apply().unwrap(); - - let mut io_uring_params = - vec![IoUringParams { flags: 1, sq_entries: 32, cq_entries: 32, ..Default::default() }]; - - let result = unsafe { - libc::syscall( - libc::SYS_io_uring_setup, - io_uring_params.len(), - io_uring_params.as_mut_ptr(), - ) - }; - - assert_eq!(result, -1); - assert_eq!(IoError::last_os_error().kind(), IoErrorKind::PermissionDenied); - } - - #[test] - fn allow_local_sockets() { - NetworkFilter::apply().unwrap(); - - let fd = unsafe { libc::socket(libc::AF_UNIX, libc::SOCK_STREAM, 0) }; - if fd < 0 { - panic!("AF_UNIX socket creation failed: {}", IoError::last_os_error()); - } - - unsafe { libc::close(fd) }; - } - - #[repr(C)] - #[derive(Default)] - struct IoUringParams { - sq_entries: u32, - cq_entries: u32, - flags: u32, - sq_thread_cpu: u32, - sq_thread_idle: u32, - features: u32, - wq_fd: u32, - resv: [u32; 3], - sq_off: IoSqringOffsets, - cq_off: IoSqringOffsets, - } - - #[repr(C)] - #[derive(Default)] - struct IoSqringOffsets { - head: u32, - tail: u32, - ring_mask: u32, - ring_entries: u32, - flags: u32, - dropped: u32, - array: u32, - resv: [u32; 3], - } -} diff --git a/tests/net_without_namespaces.rs b/tests/net_without_namespaces.rs deleted file mode 100644 index 85f435c..0000000 --- a/tests/net_without_namespaces.rs +++ /dev/null @@ -1,47 +0,0 @@ -#[cfg(target_os = "linux")] -use std::collections::BTreeMap; -#[cfg(target_os = "linux")] -use std::net::TcpStream; - -#[cfg(target_os = "linux")] -use birdcage::{Birdcage, Sandbox}; -#[cfg(target_os = "linux")] -use seccompiler::{BpfProgram, SeccompAction, SeccompFilter, TargetArch}; - -#[cfg(target_os = "linux")] -#[cfg(target_arch = "x86_64")] -const ARCH: TargetArch = TargetArch::x86_64; -#[cfg(target_os = "linux")] -#[cfg(target_arch = "aarch64")] -const ARCH: TargetArch = TargetArch::aarch64; - -#[cfg(target_os = "linux")] -fn main() { - // Create seccomp filter blocking `unshare` syscall. - let mut rules = BTreeMap::new(); - rules.insert(libc::SYS_unshare, Vec::new()); - let filter = SeccompFilter::new( - rules, - SeccompAction::Allow, - SeccompAction::Errno(libc::EACCES as u32), - ARCH, - ) - .unwrap(); - let program: BpfProgram = filter.try_into().unwrap(); - seccompiler::apply_filter(&program).unwrap(); - - let birdcage = Birdcage::new(); - let result = birdcage.lock(); - - // Seccomp isn't supported, so failure is desired. - if result.is_err() { - return; - } - - // Seccomp is supported, so networking should still be blocked. - let result = TcpStream::connect("8.8.8.8:443"); - assert!(result.is_err()); -} - -#[cfg(not(target_os = "linux"))] -fn main() {} diff --git a/tests/net_without_seccomp.rs b/tests/net_without_seccomp.rs deleted file mode 100644 index 14407f3..0000000 --- a/tests/net_without_seccomp.rs +++ /dev/null @@ -1,53 +0,0 @@ -#[cfg(target_os = "linux")] -use std::collections::BTreeMap; -#[cfg(target_os = "linux")] -use std::net::TcpStream; - -#[cfg(target_os = "linux")] -use birdcage::{Birdcage, Sandbox}; -#[cfg(target_os = "linux")] -use seccompiler::{ - BpfProgram, SeccompAction, SeccompCmpArgLen, SeccompCmpOp, SeccompCondition, SeccompFilter, - SeccompRule, TargetArch, -}; - -#[cfg(target_os = "linux")] -#[cfg(target_arch = "x86_64")] -const ARCH: TargetArch = TargetArch::x86_64; -#[cfg(target_os = "linux")] -#[cfg(target_arch = "aarch64")] -const ARCH: TargetArch = TargetArch::aarch64; - -#[cfg(target_os = "linux")] -fn main() { - // Create seccomp filter blocking seccomp prctl syscall. - let mut rules = BTreeMap::new(); - let seccomp_prctl = SeccompCondition::new( - 0, - SeccompCmpArgLen::Dword, - SeccompCmpOp::Eq, - libc::PR_SET_SECCOMP as u64, - ) - .unwrap(); - let rule = SeccompRule::new(vec![seccomp_prctl]).unwrap(); - rules.insert(libc::SYS_prctl, vec![rule]); - rules.insert(libc::SYS_seccomp, Vec::new()); - let filter = SeccompFilter::new( - rules, - SeccompAction::Allow, - SeccompAction::Errno(libc::EACCES as u32), - ARCH, - ) - .unwrap(); - let program: BpfProgram = filter.try_into().unwrap(); - seccompiler::apply_filter(&program).unwrap(); - - let birdcage = Birdcage::new(); - birdcage.lock().unwrap(); - - let result = TcpStream::connect("8.8.8.8:443"); - assert!(result.is_err()); -} - -#[cfg(not(target_os = "linux"))] -fn main() {}