Skip to content

Commit

Permalink
Overlay mount xen-tools into user app container
Browse files Browse the repository at this point in the history
Starting user apps containers by bind mounting xen-tools rootfs doesn't
provide sufficient isolation when mounting other files on top of it.
Example: when mounting other files on top of xen-tools rootfs,
containerd will create stubs for those files in the original rootfs
even though it's mounted read-only.

To provide better isolation, we need to mount xen-tools rootfs into the
container through overlayfs. For this we first create an empty snapshot
in containerd which we will use for upper and work directories of the
overlayfs. We then mount xen-tools rootfs into the root of the container
and the stubs that are created by containerd are now in the snapshot
instead of the original rootfs.

Since the snapshot is created explicitly by us and protected from being
garbage collected we also delete it explicitly when the container is
deleted.

Signed-off-by: Paul Gaiduk <[email protected]>
(cherry picked from commit df0ce58)
  • Loading branch information
europaul authored and eriknordmark committed Nov 6, 2024
1 parent 3dc5655 commit 4ba9db8
Show file tree
Hide file tree
Showing 7 changed files with 187 additions and 63 deletions.
84 changes: 64 additions & 20 deletions pkg/pillar/containerd/containerd.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/containerd/containerd/api/services/tasks/v1"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/images"
"github.com/containerd/containerd/leases"
"github.com/containerd/containerd/mount"
Expand All @@ -34,12 +35,11 @@ import (
"github.com/lf-edge/eve/pkg/pillar/utils/persist"
"github.com/opencontainers/go-digest"
"github.com/opencontainers/image-spec/identity"
"github.com/opencontainers/runtime-spec/specs-go"
runtimespecs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/vishvananda/netlink"

v1stat "github.com/containerd/cgroups/stats/v1"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
spec "github.com/opencontainers/image-spec/specs-go/v1"
imagespecs "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -179,7 +179,7 @@ func (client *Client) CtrWriteBlob(ctx context.Context, blobHash string, expecte
return fmt.Errorf("CtrWriteBlob: exception while validating hash format of %s. %v", blobHash, err)
}
if err := content.WriteBlob(ctx, client.contentStore, blobHash, reader,
spec.Descriptor{Digest: expectedDigest, Size: int64(expectedSize)}); err != nil {
imagespecs.Descriptor{Digest: expectedDigest, Size: int64(expectedSize)}); err != nil {
return fmt.Errorf("CtrWriteBlob: Exception while writing blob: %s. %s", blobHash, err.Error())
}
return nil
Expand Down Expand Up @@ -207,7 +207,7 @@ func (client *Client) CtrReadBlob(ctx context.Context, blobHash string) (io.Read
if err != nil {
return nil, fmt.Errorf("CtrReadBlob: Exception getting info of blob: %s. %s", blobHash, err.Error())
}
readerAt, err := client.contentStore.ReaderAt(ctx, spec.Descriptor{Digest: shaDigest})
readerAt, err := client.contentStore.ReaderAt(ctx, imagespecs.Descriptor{Digest: shaDigest})
if err != nil {
return nil, fmt.Errorf("CtrReadBlob: Exception while reading blob: %s. %s", blobHash, err.Error())
}
Expand Down Expand Up @@ -304,20 +304,48 @@ func (client *Client) CtrDeleteImage(ctx context.Context, reference string) erro
return client.ctrdClient.ImageService().Delete(ctx, reference)
}

// CtrPrepareSnapshot creates snapshot for the given image
// CtrCreateEmptySnapshot creates an empty snapshot with the given snapshotID or returns the existing snapshot if it already exists.
func (client *Client) CtrCreateEmptySnapshot(ctx context.Context, snapshotID string) ([]mount.Mount, error) {
if err := client.verifyCtr(ctx, true); err != nil {
return nil, fmt.Errorf("CtrCreateEmptySnapshot: exception while verifying ctrd client: %s", err.Error())
}
snapshotter := client.ctrdClient.SnapshotService(defaultSnapshotter)
snapshotMount, err := snapshotter.Mounts(ctx, snapshotID)
if errdefs.IsNotFound(err) {
logrus.Debugf("Snapshot %s does not exist, creating it", snapshotID)
snapshotMount, err = client.CtrPrepareSnapshot(ctx, snapshotID, nil)
if err != nil {
return nil, err
}
} else if err != nil {
return nil, err
} else {
logrus.Debugf("Snapshot %s already exists, reusing it", snapshotID)
}
return snapshotMount, nil
}

// CtrPrepareSnapshot creates snapshot for the given image or a clean one if no image is provided.
func (client *Client) CtrPrepareSnapshot(ctx context.Context, snapshotID string, image containerd.Image) ([]mount.Mount, error) {
if err := client.verifyCtr(ctx, true); err != nil {
return nil, fmt.Errorf("CtrPrepareSnapshot: exception while verifying ctrd client: %s", err.Error())
}
// use rootfs unpacked image to create a writable snapshot with default snapshotter
diffIDs, err := image.RootFS(ctx)
if err != nil {
err = fmt.Errorf("CtrPrepareSnapshot: Could not load rootfs of image: %v. %v", image.Name(), err)
return nil, err

var parent string
if image == nil {
// create a clean writable snapshot if no image is provided
parent = ""
} else {
// use rootfs unpacked image to create a writable snapshot with default snapshotter
diffIDs, err := image.RootFS(ctx)
if err != nil {
err = fmt.Errorf("CtrPrepareSnapshot: Could not load rootfs of image: %v. %v", image.Name(), err)
return nil, err
}
parent = identity.ChainID(diffIDs).String()
}

snapshotter := client.ctrdClient.SnapshotService(defaultSnapshotter)
parent := identity.ChainID(diffIDs).String()
labels := map[string]string{"containerd.io/gc.root": time.Now().UTC().Format(time.RFC3339)}
return snapshotter.Prepare(ctx, snapshotID, parent, snapshots.WithLabels(labels))
}
Expand Down Expand Up @@ -354,6 +382,22 @@ func (client *Client) CtrListSnapshotInfo(ctx context.Context) ([]snapshots.Info
return snapshotInfoList, nil
}

// CtrSnapshotExists checks if a snapshot with the given snapshotName exists in containerd's snapshot store
func (client *Client) CtrSnapshotExists(ctx context.Context, snapshotName string) (bool, error) {
if err := client.verifyCtr(ctx, true); err != nil {
return false, err
}

snapshotter := client.ctrdClient.SnapshotService(defaultSnapshotter)
_, err := snapshotter.Stat(ctx, snapshotName)
if errdefs.IsNotFound(err) {
return false, nil
} else if err != nil {
return false, err
}
return true, nil
}

// CtrGetSnapshotUsage returns snapshot's usage for snapshotID present in containerd's snapshot store
func (client *Client) CtrGetSnapshotUsage(ctx context.Context, snapshotID string) (*snapshots.Usage, error) {
if err := client.verifyCtr(ctx, true); err != nil {
Expand Down Expand Up @@ -546,7 +590,7 @@ func (client *Client) CtrListTaskIds(ctx context.Context) ([]string, error) {
}

// CtrNewContainer starts a new container with a specific spec and specOpts
func (client *Client) CtrNewContainer(ctx context.Context, spec specs.Spec, specOpts []oci.SpecOpts, name string, containerImage containerd.Image) (containerd.Container, error) {
func (client *Client) CtrNewContainer(ctx context.Context, spec runtimespecs.Spec, specOpts []oci.SpecOpts, name string, containerImage containerd.Image) (containerd.Container, error) {

opts := []containerd.NewContainerOpts{
containerd.WithImage(containerImage),
Expand All @@ -560,13 +604,13 @@ func (client *Client) CtrNewContainer(ctx context.Context, spec specs.Spec, spec

// CtrNewContainerWithPersist starts a new container with /persist mounted
func (client *Client) CtrNewContainerWithPersist(ctx context.Context, name string, containerImage containerd.Image) (containerd.Container, error) {
var spec specs.Spec
var spec runtimespecs.Spec

spec.Root = &specs.Root{
spec.Root = &runtimespecs.Root{
Readonly: false,
}

mount := specs.Mount{
mount := runtimespecs.Mount{
Destination: "/persist",
Type: "bind",
Source: "/persist",
Expand All @@ -575,7 +619,7 @@ func (client *Client) CtrNewContainerWithPersist(ctx context.Context, name strin
specOpts := []oci.SpecOpts{
oci.WithDefaultSpec(),
oci.WithImageConfig(containerImage),
oci.WithMounts([]specs.Mount{mount}),
oci.WithMounts([]runtimespecs.Mount{mount}),
oci.WithDefaultUnixDevices,
}

Expand Down Expand Up @@ -808,7 +852,7 @@ func prepareProcess(pid int, VifList []types.VifInfo) error {
logrus.Infof("prepareProcess(%d, %v)", pid, VifList)
for _, iface := range VifList {
if iface.Vif == "" {
return fmt.Errorf("Interface requires a name")
return fmt.Errorf("interface requires a name")
}

var link netlink.Link
Expand Down Expand Up @@ -846,8 +890,8 @@ func prepareProcess(pid int, VifList []types.VifInfo) error {
return nil
}

func getSavedImageInfo(containerPath string) (ocispec.Image, error) {
var image ocispec.Image
func getSavedImageInfo(containerPath string) (imagespecs.Image, error) {
var image imagespecs.Image

data, err := os.ReadFile(filepath.Join(containerPath, imageConfigFilename))
if err != nil {
Expand Down
59 changes: 49 additions & 10 deletions pkg/pillar/containerd/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,14 @@ var dhcpcdScript = []string{"eve", "exec", "pillar", "/opt/zededa/bin/dhcpcd.sh"
// for all the different task usecases
type ociSpec struct {
specs.Spec
name string
client *Client
exposedPorts map[string]struct{}
volumes map[string]struct{}
labels map[string]string
stopSignal string
service bool
name string
client *Client
exposedPorts map[string]struct{}
volumes map[string]struct{}
labels map[string]string
stopSignal string
service bool
containerOpts []containerd.NewContainerOpts
}

// OCISpec provides methods to manipulate OCI runtime specifications and create containers based on them
Expand Down Expand Up @@ -114,7 +115,42 @@ func (s *ociSpec) AddLoader(volume string) error {
return err
}

spec.Root = &specs.Root{Readonly: true, Path: filepath.Join(volume, "rootfs")}
// we're gonna use a little hack: since we already have the rootfs of a xen-tools container
// laid out on disk, but don't have it in a form of a snapshot or an image, we're going to
// create an empty snapshot and then overlay the rootfs on top of it - this way we can save
// ourselves copying the rootfs around and still have the newest version of xen-tools on every
// boot, while the original xen-tools rootfs stays read-only

ctrdCtx, done := s.client.CtrNewUserServicesCtx()
defer done()

// create a clean snapshot
snapshotName := s.name
snapshotMount, err := s.client.CtrCreateEmptySnapshot(ctrdCtx, snapshotName)
if err != nil {
return err
}

// remove fs from the end of snapshotMount
snapshotPath := strings.TrimSuffix(snapshotMount[0].Source, "/fs")
logrus.Debugf("Snapshot path: %s", snapshotPath)

xenToolsMount := specs.Mount{
Type: "overlay",
Source: "overlay",
Destination: "/",
Options: []string{
"index=off",
"workdir=" + snapshotPath + "/work",
"upperdir=" + snapshotPath + "/fs",
"lowerdir=" + volume + "/rootfs",
}}

// we need to prepend the loader mount to the existing mounts to make sure it's mounted first because it's the rootfs
spec.Mounts = append([]specs.Mount{xenToolsMount}, spec.Mounts...)

s.containerOpts = append(s.containerOpts, containerd.WithSnapshot(snapshotName))

spec.Linux.Resources = s.Linux.Resources
spec.Linux.CgroupsPath = s.Linux.CgroupsPath

Expand Down Expand Up @@ -261,11 +297,14 @@ func (s *ociSpec) Load(file *os.File) error {
func (s *ociSpec) CreateContainer(removeExisting bool) error {
ctrdCtx, done := s.client.CtrNewUserServicesCtx()
defer done()
_, err := s.client.ctrdClient.NewContainer(ctrdCtx, s.name, containerd.WithSpec(&s.Spec))

containerOpts := append(s.containerOpts, containerd.WithSpec(&s.Spec))

_, err := s.client.ctrdClient.NewContainer(ctrdCtx, s.name, containerOpts...)
// if container exists, is stopped and we are asked to remove existing - try that
if err != nil && removeExisting {
_ = s.client.CtrDeleteContainer(ctrdCtx, s.name)
_, err = s.client.ctrdClient.NewContainer(ctrdCtx, s.name, containerd.WithSpec(&s.Spec))
_, err = s.client.ctrdClient.NewContainer(ctrdCtx, s.name, containerOpts...)
}
return err
}
Expand Down
Loading

0 comments on commit 4ba9db8

Please sign in to comment.