Skip to content

Commit

Permalink
Add namespace support to Linux sandbox
Browse files Browse the repository at this point in the history
The previous Linux sandbox would allow for access of abstract namespace
sockets and handled network sandboxing through fragile seccomp rules.

To both simplify the code and improve our sandboxing, this patch
introduces the usage of Linux namespaces to both clear the abstract
namespace and create a network namespace.

It is now necessary to always lock down the sandbox in a newly created
process, since user namespaces cannot be created from multi-threaded
applications and require writing to `/proc/self/*id_map` exactly once
(every following write from the same process is a permission error).

This patch completely removes all seccomp code, however some part of it
might still be useful for future sandboxing improvements.
  • Loading branch information
cd-work committed Sep 13, 2023
1 parent abc04fe commit 6f82216
Show file tree
Hide file tree
Showing 12 changed files with 152 additions and 605 deletions.
39 changes: 38 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,48 @@ rust-version = "1.63.0"
license = "GPL-3.0-or-later"
edition = "2021"

[[test]]
name = "canonicalize"
path = "tests/canonicalize.rs"
harness = false

[[test]]
name = "env"
path = "tests/env.rs"
harness = false

[[test]]
name = "exec"
path = "tests/exec.rs"
harness = false

[[test]]
name = "fs"
path = "tests/fs.rs"
harness = false

[[test]]
name = "full_env"
path = "tests/full_env.rs"
harness = false

[[test]]
name = "full_sandbox"
path = "tests/full_sandbox.rs"
harness = false

[[test]]
name = "net"
path = "tests/net.rs"
harness = false

[target.'cfg(target_os = "linux")'.dependencies]
seccompiler = "0.2.0"
landlock = "0.2.0"
libc = "0.2.132"

[dev-dependencies]
clap = { version = "3.2.17", features = ["derive"] }
tempfile = "3.3.0"

[dependencies]
bitflags = "2.4.0"
27 changes: 0 additions & 27 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@ use std::error::Error as StdError;
#[cfg(target_os = "macos")]
use std::ffi::OsString;
use std::fmt::{self, Display, Formatter};
#[cfg(target_os = "macos")]
use std::io::Error as IoError;
use std::result::Result as StdResult;

#[cfg(target_os = "linux")]
use landlock::{PathFdError, RulesetError};
#[cfg(target_os = "linux")]
#[cfg(target_os = "linux")]
use seccompiler::{BackendError, Error as SeccompError};

/// Birdcage result type.
pub type Result<T> = StdResult<T, Error>;
Expand All @@ -24,18 +20,13 @@ pub enum Error {
#[cfg(target_os = "linux")]
Ruleset(RulesetError),

/// Seccomp errors.
#[cfg(target_os = "linux")]
Seccomp(SeccompError),

/// Invalid sandbox exception path.
#[cfg(target_os = "linux")]
InvalidPath(PathFdError),
#[cfg(target_os = "macos")]
InvalidPath(InvalidPathError),

/// I/O error.
#[cfg(target_os = "macos")]
Io(IoError),

/// Sandbox activation failed.
Expand All @@ -50,12 +41,9 @@ impl Display for Error {
#[cfg(target_os = "linux")]
Self::Ruleset(error) => write!(f, "landlock ruleset error: {error}"),
#[cfg(target_os = "linux")]
Self::Seccomp(error) => write!(f, "seccomp error: {error}"),
#[cfg(target_os = "linux")]
Self::InvalidPath(error) => write!(f, "invalid path: {error}"),
#[cfg(target_os = "macos")]
Self::InvalidPath(error) => write!(f, "invalid path: {error:?}"),
#[cfg(target_os = "macos")]
Self::Io(error) => write!(f, "input/output error: {error}"),
Self::ActivationFailed(error) => {
write!(f, "failed to initialize a sufficient sandbox: {error}")
Expand All @@ -71,20 +59,6 @@ impl From<RulesetError> for Error {
}
}

#[cfg(target_os = "linux")]
impl From<SeccompError> for Error {
fn from(error: SeccompError) -> Self {
Self::Seccomp(error)
}
}

#[cfg(target_os = "linux")]
impl From<BackendError> for Error {
fn from(error: BackendError) -> Self {
Self::Seccomp(SeccompError::Backend(error))
}
}

#[cfg(target_os = "linux")]
impl From<PathFdError> for Error {
fn from(error: PathFdError) -> Self {
Expand All @@ -99,7 +73,6 @@ impl From<InvalidPathError> for Error {
}
}

#[cfg(target_os = "macos")]
impl From<IoError> for Error {
fn from(error: IoError) -> Self {
Self::Io(error)
Expand Down
8 changes: 7 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,13 @@ pub trait Sandbox: Sized {
/// prohibit access to this resource without creating a new sandbox.
fn add_exception(&mut self, exception: Exception) -> Result<&mut Self>;

/// Apply the sandbox restrictions to the current thread.
/// Apply the sandbox restrictions to the current process.
///
/// # Errors
///
/// Sandboxing will fail if the calling process is not single-threaded, or
/// has previously been sandboxed. It is recommended to spawn a new process
/// before sandboxing to avoid these issues.
fn lock(self) -> Result<()>;
}

Expand Down
103 changes: 96 additions & 7 deletions src/linux/mod.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
//! Linux sandboxing.
//!
//! This module implements sandboxing on Linux based on the Landlock LSM,
//! combined with seccomp for anything other than the filesystem.
//! combined with namespaces for network filtering.
use std::fs;
use std::io::Error as IoError;

use bitflags::bitflags;
use landlock::{
make_bitflags, Access, AccessFs, Compatible, PathBeneath, PathFd, Ruleset, RulesetAttr,
RulesetCreated, RulesetCreatedAttr, RulesetStatus, ABI as LANDLOCK_ABI,
};

use crate::error::{Error, Result};
use crate::linux::seccomp::NetworkFilter;
use crate::{Exception, Sandbox};

mod seccomp;

/// Minimum landlock ABI version.
const ABI: LANDLOCK_ABI = LANDLOCK_ABI::V1;

/// Linux sandboxing based on Landlock and Seccomp.
/// Linux sandboxing.
pub struct LinuxSandbox {
env_exceptions: Vec<String>,
landlock: RulesetCreated,
Expand Down Expand Up @@ -69,9 +70,13 @@ impl Sandbox for LinuxSandbox {
crate::restrict_env_variables(&self.env_exceptions);
}

// Create and apply seccomp filter.
// Enter a user namespace to unbind abstract namespace sockets.
create_user_namespace(false)?;

// Create network namespace.
if !self.allow_networking {
NetworkFilter::apply()?;
create_user_namespace(true)?;
unshare(Namespaces::NETWORK)?;
}

// Apply landlock rules.
Expand All @@ -85,3 +90,87 @@ impl Sandbox for LinuxSandbox {
}
}
}

/// Create a new user namespace.
///
/// If the `become_root` flag is set, then the current user will be mapped to
/// UID 0 inside the namespace. Otherwise the current user will be mapped to its
/// UID of the parent namespace.
pub(crate) fn create_user_namespace(become_root: bool) -> Result<()> {
// Get the current UID.
let uid = unsafe { libc::getuid() };

// Create the namespace.
unshare(Namespaces::USER)?;

// Map the UID and GID.
let map = if become_root { format!("0 {uid} 1\n") } else { format!("{uid} {uid} 1\n") };
fs::write("/proc/self/uid_map", map.as_bytes())?;
fs::write("/proc/self/setgroups", b"deny")?;
fs::write("/proc/self/gid_map", map.as_bytes())?;

Ok(())
}

/// Enter a namespace.
pub(crate) fn unshare(namespaces: Namespaces) -> Result<()> {
let result = unsafe { libc::unshare(namespaces.bits()) };
if result == 0 {
Ok(())
} else {
Err(IoError::last_os_error().into())
}
}

bitflags! {
/// Unshare system call namespace flags.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct Namespaces: libc::c_int {
/// Unshare the file descriptor table, so that the calling process no longer
/// shares its file descriptors with any other process.
const FILES = libc::CLONE_FILES;
/// Unshare filesystem attributes, so that the calling process no longer shares
/// its root directory, current directory, or umask attributes with any other process.
const FS = libc::CLONE_FS;
/// Unshare the cgroup namespace.
const CGROUP = libc::CLONE_NEWCGROUP;
/// Unshare the IPC namespace, so that the calling process has a private copy of
/// the IPC namespace which is not shared with any other process. Specifying
/// this flag automatically implies [`Namespaces::SYSVSEM`] as well.
const IPC = libc::CLONE_NEWIPC;
/// Unshare the network namespace, so that the calling process is moved into a
/// new network namespace which is not shared with any previously existing process.
const NETWORK = libc::CLONE_NEWNET;
/// Unshare the mount namespace, so that the calling process has a private copy
/// of its namespace which is not shared with any other process. Specifying this
/// flag automatically implies [`Namespaces::FS`] as well.
const MOUNT = libc::CLONE_NEWNS;
/// Unshare the PID namespace, so that the calling process has a new PID
/// namespace for its children which is not shared with any previously existing
/// process. The calling process is **not** moved into the new namespace. The
/// first child created by the calling process will have the process ID 1 and
/// will assume the role of init in the new namespace. Specifying this flag
/// automatically implies [`libc::CLONE_THREAD`] as well.
const PID = libc::CLONE_NEWPID;
/// Unshare the time namespace, so that the calling process has a new time
/// namespace for its children which is not shared with any previously existing
/// process. The calling process is **not** moved into the new namespace.
const TIME = 0x80;
/// Unshare the user namespace, so that the calling process is moved into a new
/// user namespace which is not shared with any previously existing process. The
/// caller obtains a full set of capabilities in the new namespace.
///
/// Requires that the calling process is not threaded; specifying this flag
/// automatically implies [`libc::CLONE_THREAD`] and [`Namespaces::FS`] as well.
const USER = libc::CLONE_NEWUSER;
/// Unshare the UTS IPC namespace, so that the calling process has a private
/// copy of the UTS namespace which is not shared with any other process.
const UTS = libc::CLONE_NEWUTS;
/// Unshare System V semaphore adjustment (semadj) values, so that the calling
/// process has a new empty semadj list that is not shared with any other
/// process. If this is the last process that has a reference to the process's
/// current semadj list, then the adjustments in that list are applied to the
/// corresponding semaphores
const SYSVSEM = libc::CLONE_SYSVSEM;
}
}
Loading

0 comments on commit 6f82216

Please sign in to comment.