Skip to content

Commit

Permalink
Add namespace support to Linux sandbox (#43)
Browse files Browse the repository at this point in the history
The previous Linux sandbox would allow for access of abstract namespace
sockets and handled network sandboxing through fragile seccomp rules.

To both simplify the code and improve our sandboxing, this patch
introduces the usage of Linux namespaces to both clear the abstract
namespace and create a network namespace.

It is now recommended to lock down the sandbox in a newly created
process, since user namespaces cannot be created in multi-threaded
applications.

While this patch technically removes the necessity for our seccomp
network filter, it is still kept in place for redundancy and as an
alternative when namespace creation is blocked (i.e. inside docker).
  • Loading branch information
cd-work authored Sep 14, 2023
1 parent abc04fe commit b940485
Show file tree
Hide file tree
Showing 14 changed files with 345 additions and 119 deletions.
48 changes: 48 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,51 @@ rust-version = "1.63.0"
license = "GPL-3.0-or-later"
edition = "2021"

[[test]]
name = "canonicalize"
path = "tests/canonicalize.rs"
harness = false

[[test]]
name = "env"
path = "tests/env.rs"
harness = false

[[test]]
name = "exec"
path = "tests/exec.rs"
harness = false

[[test]]
name = "fs"
path = "tests/fs.rs"
harness = false

[[test]]
name = "full_env"
path = "tests/full_env.rs"
harness = false

[[test]]
name = "full_sandbox"
path = "tests/full_sandbox.rs"
harness = false

[[test]]
name = "net"
path = "tests/net.rs"
harness = false

[[test]]
name = "net_without_seccomp"
path = "tests/net_without_seccomp.rs"
harness = false

[[test]]
name = "net_without_namespaces"
path = "tests/net_without_namespaces.rs"
harness = false

[target.'cfg(target_os = "linux")'.dependencies]
seccompiler = "0.2.0"
landlock = "0.2.0"
Expand All @@ -17,3 +62,6 @@ libc = "0.2.132"
[dev-dependencies]
clap = { version = "3.2.17", features = ["derive"] }
tempfile = "3.3.0"

[dependencies]
bitflags = "2.4.0"
5 changes: 0 additions & 5 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ use std::error::Error as StdError;
#[cfg(target_os = "macos")]
use std::ffi::OsString;
use std::fmt::{self, Display, Formatter};
#[cfg(target_os = "macos")]
use std::io::Error as IoError;
use std::result::Result as StdResult;

#[cfg(target_os = "linux")]
use landlock::{PathFdError, RulesetError};
#[cfg(target_os = "linux")]
#[cfg(target_os = "linux")]
use seccompiler::{BackendError, Error as SeccompError};

/// Birdcage result type.
Expand All @@ -35,7 +33,6 @@ pub enum Error {
InvalidPath(InvalidPathError),

/// I/O error.
#[cfg(target_os = "macos")]
Io(IoError),

/// Sandbox activation failed.
Expand All @@ -55,7 +52,6 @@ impl Display for Error {
Self::InvalidPath(error) => write!(f, "invalid path: {error}"),
#[cfg(target_os = "macos")]
Self::InvalidPath(error) => write!(f, "invalid path: {error:?}"),
#[cfg(target_os = "macos")]
Self::Io(error) => write!(f, "input/output error: {error}"),
Self::ActivationFailed(error) => {
write!(f, "failed to initialize a sufficient sandbox: {error}")
Expand Down Expand Up @@ -99,7 +95,6 @@ impl From<InvalidPathError> for Error {
}
}

#[cfg(target_os = "macos")]
impl From<IoError> for Error {
fn from(error: IoError) -> Self {
Self::Io(error)
Expand Down
6 changes: 5 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ pub trait Sandbox: Sized {
/// prohibit access to this resource without creating a new sandbox.
fn add_exception(&mut self, exception: Exception) -> Result<&mut Self>;

/// Apply the sandbox restrictions to the current thread.
/// Apply the sandbox restrictions to the current process.
///
/// # Errors
///
/// Sandboxing will fail if the calling process is not single-threaded.
fn lock(self) -> Result<()>;
}

Expand Down
111 changes: 107 additions & 4 deletions src/linux/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
//! Linux sandboxing.
//!
//! This module implements sandboxing on Linux based on the Landlock LSM,
//! combined with seccomp for anything other than the filesystem.
//! namespaces, and seccomp.
use std::fs;
use std::io::Error as IoError;

use bitflags::bitflags;
use landlock::{
make_bitflags, Access, AccessFs, Compatible, PathBeneath, PathFd, Ruleset, RulesetAttr,
RulesetCreated, RulesetCreatedAttr, RulesetStatus, ABI as LANDLOCK_ABI,
Expand All @@ -17,7 +21,7 @@ mod seccomp;
/// Minimum landlock ABI version.
const ABI: LANDLOCK_ABI = LANDLOCK_ABI::V1;

/// Linux sandboxing based on Landlock and Seccomp.
/// Linux sandboxing.
pub struct LinuxSandbox {
env_exceptions: Vec<String>,
landlock: RulesetCreated,
Expand Down Expand Up @@ -69,9 +73,12 @@ impl Sandbox for LinuxSandbox {
crate::restrict_env_variables(&self.env_exceptions);
}

// Create and apply seccomp filter.
// Clear abstract namespace by entering a new user namespace.
let _ = create_user_namespace(false);

// Create network namespace.
if !self.allow_networking {
NetworkFilter::apply()?;
restrict_networking()?;
}

// Apply landlock rules.
Expand All @@ -85,3 +92,99 @@ impl Sandbox for LinuxSandbox {
}
}
}

/// Restrict networking using seccomp and namespaces.
fn restrict_networking() -> Result<()> {
// Create network namespace.
let result = create_user_namespace(true).and_then(|_| unshare(Namespaces::NETWORK));

// Apply seccomp network filter.
let seccomp_result = NetworkFilter::apply();
result.or(seccomp_result)
}

/// Create a new user namespace.
///
/// If the `become_root` flag is set, then the current user will be mapped to
/// UID 0 inside the namespace. Otherwise the current user will be mapped to its
/// UID of the parent namespace.
fn create_user_namespace(become_root: bool) -> Result<()> {
// Get the current UID/GID.
let uid = unsafe { libc::geteuid() };
let gid = unsafe { libc::getegid() };

// Create the namespace.
unshare(Namespaces::USER)?;

// Map the UID and GID.
let uid_map = if become_root { format!("0 {uid} 1\n") } else { format!("{uid} {uid} 1\n") };
let gid_map = if become_root { format!("0 {gid} 1\n") } else { format!("{gid} {gid} 1\n") };
fs::write("/proc/self/uid_map", uid_map.as_bytes())?;
fs::write("/proc/self/setgroups", b"deny")?;
fs::write("/proc/self/gid_map", gid_map.as_bytes())?;

Ok(())
}

/// Enter a namespace.
fn unshare(namespaces: Namespaces) -> Result<()> {
let result = unsafe { libc::unshare(namespaces.bits()) };
if result == 0 {
Ok(())
} else {
Err(IoError::last_os_error().into())
}
}

bitflags! {
/// Unshare system call namespace flags.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
struct Namespaces: libc::c_int {
/// Unshare the file descriptor table, so that the calling process no longer
/// shares its file descriptors with any other process.
const FILES = libc::CLONE_FILES;
/// Unshare filesystem attributes, so that the calling process no longer shares
/// its root directory, current directory, or umask attributes with any other process.
const FS = libc::CLONE_FS;
/// Unshare the cgroup namespace.
const CGROUP = libc::CLONE_NEWCGROUP;
/// Unshare the IPC namespace, so that the calling process has a private copy of
/// the IPC namespace which is not shared with any other process. Specifying
/// this flag automatically implies [`Namespaces::SYSVSEM`] as well.
const IPC = libc::CLONE_NEWIPC;
/// Unshare the network namespace, so that the calling process is moved into a
/// new network namespace which is not shared with any previously existing process.
const NETWORK = libc::CLONE_NEWNET;
/// Unshare the mount namespace, so that the calling process has a private copy
/// of its namespace which is not shared with any other process. Specifying this
/// flag automatically implies [`Namespaces::FS`] as well.
const MOUNT = libc::CLONE_NEWNS;
/// Unshare the PID namespace, so that the calling process has a new PID
/// namespace for its children which is not shared with any previously existing
/// process. The calling process is **not** moved into the new namespace. The
/// first child created by the calling process will have the process ID 1 and
/// will assume the role of init in the new namespace. Specifying this flag
/// automatically implies [`libc::CLONE_THREAD`] as well.
const PID = libc::CLONE_NEWPID;
/// Unshare the time namespace, so that the calling process has a new time
/// namespace for its children which is not shared with any previously existing
/// process. The calling process is **not** moved into the new namespace.
const TIME = 0x80;
/// Unshare the user namespace, so that the calling process is moved into a new
/// user namespace which is not shared with any previously existing process. The
/// caller obtains a full set of capabilities in the new namespace.
///
/// Requires that the calling process is not threaded; specifying this flag
/// automatically implies [`libc::CLONE_THREAD`] and [`Namespaces::FS`] as well.
const USER = libc::CLONE_NEWUSER;
/// Unshare the UTS IPC namespace, so that the calling process has a private
/// copy of the UTS namespace which is not shared with any other process.
const UTS = libc::CLONE_NEWUTS;
/// Unshare System V semaphore adjustment (semadj) values, so that the calling
/// process has a new empty semadj list that is not shared with any other
/// process. If this is the last process that has a reference to the process's
/// current semadj list, then the adjustments in that list are applied to the
/// corresponding semaphores
const SYSVSEM = libc::CLONE_SYSVSEM;
}
}
66 changes: 66 additions & 0 deletions src/linux/seccomp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,3 +458,69 @@ const SYSCALL_WHITELIST: &[libc::c_long] = &[
libc::SYS_futex_waitv,
libc::SYS_set_mempolicy_home_node,
];

#[cfg(test)]
mod tests {
use std::io::{Error as IoError, ErrorKind as IoErrorKind};

use super::*;

#[test]
fn block_io_uring() {
NetworkFilter::apply().unwrap();

let mut io_uring_params =
vec![IoUringParams { flags: 1, sq_entries: 32, cq_entries: 32, ..Default::default() }];

let result = unsafe {
libc::syscall(
libc::SYS_io_uring_setup,
io_uring_params.len(),
io_uring_params.as_mut_ptr(),
)
};

assert_eq!(result, -1);
assert_eq!(IoError::last_os_error().kind(), IoErrorKind::PermissionDenied);
}

#[test]
fn allow_local_sockets() {
NetworkFilter::apply().unwrap();

let fd = unsafe { libc::socket(libc::AF_UNIX, libc::SOCK_STREAM, 0) };
if fd < 0 {
panic!("AF_UNIX socket creation failed: {}", IoError::last_os_error());
}

unsafe { libc::close(fd) };
}

#[repr(C)]
#[derive(Default)]
struct IoUringParams {
sq_entries: u32,
cq_entries: u32,
flags: u32,
sq_thread_cpu: u32,
sq_thread_idle: u32,
features: u32,
wq_fd: u32,
resv: [u32; 3],
sq_off: IoSqringOffsets,
cq_off: IoSqringOffsets,
}

#[repr(C)]
#[derive(Default)]
struct IoSqringOffsets {
head: u32,
tail: u32,
ring_mask: u32,
ring_entries: u32,
flags: u32,
dropped: u32,
array: u32,
resv: [u32; 3],
}
}
3 changes: 1 addition & 2 deletions tests/canonicalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use std::fs;

use birdcage::{Birdcage, Exception, Sandbox};

#[test]
fn canonicalize() {
fn main() {
let mut birdcage = Birdcage::new().unwrap();
birdcage.add_exception(Exception::Read("./".into())).unwrap();
birdcage.lock().unwrap();
Expand Down
3 changes: 1 addition & 2 deletions tests/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use std::env;

use birdcage::{Birdcage, Exception, Sandbox};

#[test]
fn partial_env() {
fn main() {
// Setup our environment variables
env::set_var("PUBLIC", "GOOD");
env::set_var("PRIVATE", "BAD");
Expand Down
3 changes: 1 addition & 2 deletions tests/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ use std::process::Command;

use birdcage::{Birdcage, Exception, Sandbox};

#[test]
fn execution() {
fn main() {
let mut birdcage = Birdcage::new().unwrap();
birdcage.add_exception(Exception::ExecuteAndRead("/usr/bin/true".into())).unwrap();
birdcage.add_exception(Exception::ExecuteAndRead("/usr/lib".into())).unwrap();
Expand Down
3 changes: 1 addition & 2 deletions tests/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ use std::fs;
use birdcage::{Birdcage, Exception, Sandbox};
use tempfile::NamedTempFile;

#[test]
fn partial_fs() {
fn main() {
const FILE_CONTENT: &str = "expected content";

// Setup our test files.
Expand Down
3 changes: 1 addition & 2 deletions tests/full_env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ use std::env;

use birdcage::{Birdcage, Exception, Sandbox};

#[test]
fn full_env() {
fn main() {
// Setup our environment variables
env::set_var("PUBLIC", "GOOD");

Expand Down
Loading

0 comments on commit b940485

Please sign in to comment.