diff --git a/config/config.toml b/config/config.toml index 21385f5d0..3e75d284e 100644 --- a/config/config.toml +++ b/config/config.toml @@ -16,6 +16,7 @@ filesystem_cache_type="" resolve_result_entry=0 debug=false allow_no_verification=true +allow_idmap=true # disable_verification=false # Causes TestRunWithDefaultConfig to break, but # fine to use in /etc/soci-snapshotter-grpc-config.toml diff --git a/config/fs.go b/config/fs.go index 4088d9b6e..05e5978e7 100644 --- a/config/fs.go +++ b/config/fs.go @@ -56,6 +56,7 @@ type FSConfig struct { NoPrometheus bool `toml:"no_prometheus"` MountTimeoutSec int64 `toml:"mount_timeout_sec"` FuseMetricsEmitWaitDurationSec int64 `toml:"fuse_metrics_emit_wait_duration_sec"` + AllowIdMap bool `toml:"allow_idmap" default:"true"` RetryableHTTPClientConfig `toml:"http"` BlobConfig `toml:"blob"` diff --git a/fs/fs.go b/fs/fs.go index 40011bf8e..10072baef 100644 --- a/fs/fs.go +++ b/fs/fs.go @@ -47,7 +47,9 @@ import ( "fmt" golog "log" "net/http" + "os" "os/exec" + "path/filepath" "sync" "syscall" "time" @@ -59,6 +61,7 @@ import ( layermetrics "github.com/awslabs/soci-snapshotter/fs/metrics/layer" "github.com/awslabs/soci-snapshotter/fs/remote" "github.com/awslabs/soci-snapshotter/fs/source" + "github.com/awslabs/soci-snapshotter/idtools" "github.com/awslabs/soci-snapshotter/metadata" "github.com/awslabs/soci-snapshotter/snapshot" "github.com/awslabs/soci-snapshotter/soci" @@ -67,6 +70,7 @@ import ( ctdsnapshotters "github.com/containerd/containerd/pkg/snapshotters" "github.com/containerd/containerd/reference" "github.com/containerd/containerd/remotes/docker" + "github.com/containerd/errdefs" "github.com/containerd/log" metrics "github.com/docker/go-metrics" fusefs "github.com/hanwen/go-fuse/v2/fs" @@ -455,6 +459,79 @@ func (fs *filesystem) getSociContext(ctx context.Context, imageRef, indexDigest, return c, err } +func (fs *filesystem) getIdMappedMountpoint(mountpoint, activeLayerKey string) string { + d := filepath.Dir(mountpoint) + return filepath.Join(fmt.Sprintf("%s_%s", d, activeLayerKey), "fs") +} + +func (fs *filesystem) IdMapMount(ctx context.Context, mountpoint, activeLayerKey string, idmapper idtools.IDMap) (string, error) { + newMountpoint := fs.getIdMappedMountpoint(mountpoint, activeLayerKey) + log.G(ctx).WithField("mountpoint", newMountpoint).Info("creating remote id-mapped mount") + if err := os.Mkdir(filepath.Dir(newMountpoint), 0700); err != nil { + return "", err + } + if err := os.Mkdir(newMountpoint, 0755); err != nil { + return "", err + } + + fs.layerMu.Lock() + l := fs.layer[mountpoint] + if l == nil { + fs.layerMu.Unlock() + log.G(ctx).WithField("mountpoint", newMountpoint).Info("failed to create remote id-mapped mount") + return "", errdefs.ErrNotFound + } + fs.layer[newMountpoint] = l + fs.layerMu.Unlock() + node, err := l.RootNode(0, idmapper) + if err != nil { + return "", err + } + + rawFS := fusefs.NewNodeFS(node, &fusefs.Options{ + AttrTimeout: &fs.attrTimeout, + EntryTimeout: &fs.entryTimeout, + NegativeTimeout: &fs.negativeTimeout, + NullPermissions: true, + }) + mountOpts := &fuse.MountOptions{ + AllowOther: true, // allow users other than root&mounter to access fs + FsName: "soci", // name this filesystem as "soci" + Debug: fs.debug, + DisableXAttrs: l.DisableXAttrs(), + Options: []string{"default_permissions", "ro"}, + } + if _, err := exec.LookPath(fusermountBin); err == nil { + mountOpts.Options = []string{"suid"} // option for fusermount; allow setuid inside container + } else { + log.G(ctx).WithError(err).Infof("%s not installed; trying direct mount", fusermountBin) + mountOpts.DirectMount = true + } + server, err := fuse.NewServer(rawFS, newMountpoint, mountOpts) + if err != nil { + log.G(ctx).WithError(err).Debug("failed to make filesystem server") + return "", err + } + + go server.Serve() + + log.G(ctx).WithField("mountpoint", newMountpoint).Info("successfully created remote mountpoint") + return newMountpoint, server.WaitMount() +} + +func (fs *filesystem) IdMapMountLocal(ctx context.Context, mountpoint, activeLayerKey string, idmapper idtools.IDMap) (string, error) { + newMountpoint := fs.getIdMappedMountpoint(mountpoint, activeLayerKey) + log.G(ctx).WithField("mountpoint", newMountpoint).Info("creating local id-mapped mount") + + if err := idtools.RemapDir(ctx, mountpoint, newMountpoint, idmapper); err != nil { + log.G(ctx).WithField("mountpoint", newMountpoint).Errorf("failed to create local mount: %v", err) + return "", err + } + + log.G(ctx).WithField("mountpoint", newMountpoint).Info("successfully created local mountpoint") + return newMountpoint, nil +} + func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[string]string) (retErr error) { // Setting the start time to measure the Mount operation duration. start := time.Now() @@ -566,7 +643,7 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s // Maybe we should reword the log here or remove it entirely, // since the old Verify() function no longer serves any purpose. - node, err := l.RootNode(0) + node, err := l.RootNode(0, idtools.IDMap{}) if err != nil { log.G(ctx).WithError(err).Warnf("Failed to get root node") retErr = fmt.Errorf("failed to get root node: %w", err) @@ -602,6 +679,7 @@ func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[s Debug: fs.debug, Logger: golog.New(logger, "", 0), DisableXAttrs: l.DisableXAttrs(), + Options: []string{"default_permissions", "ro"}, } if _, err := exec.LookPath(fusermountBin); err == nil { mountOpts.Options = []string{"suid"} // option for fusermount; allow setuid inside container @@ -692,7 +770,8 @@ func (fs *filesystem) Unmount(ctx context.Context, mountpoint string) error { l, ok := fs.layer[mountpoint] if !ok { fs.layerMu.Unlock() - return fmt.Errorf("specified path %q isn't a mountpoint", mountpoint) + log.G(ctx).Errorf("specified path %q isn't a remote mount", mountpoint) + return errdefs.ErrNotFound } delete(fs.layer, mountpoint) // unregisters the corresponding layer l.Done() @@ -706,6 +785,10 @@ func (fs *filesystem) Unmount(ctx context.Context, mountpoint string) error { return syscall.Unmount(mountpoint, syscall.MNT_FORCE) } +func (fs *filesystem) UnmountLocal(ctx context.Context, mountpoint string) error { + return syscall.Unmount(mountpoint, syscall.MNT_FORCE) +} + // neighboringLayers returns layer descriptors except the `target` layer in the specified manifest. func neighboringLayers(manifest ocispec.Manifest, target ocispec.Descriptor) (descs []ocispec.Descriptor) { for _, desc := range manifest.Layers { diff --git a/fs/fs_test.go b/fs/fs_test.go index 2591a270a..119ec9c2c 100644 --- a/fs/fs_test.go +++ b/fs/fs_test.go @@ -46,6 +46,7 @@ import ( "github.com/awslabs/soci-snapshotter/fs/layer" "github.com/awslabs/soci-snapshotter/fs/remote" "github.com/awslabs/soci-snapshotter/fs/source" + "github.com/awslabs/soci-snapshotter/idtools" "github.com/containerd/containerd/reference" "github.com/containerd/containerd/remotes/docker" fusefs "github.com/hanwen/go-fuse/v2/fs" @@ -83,10 +84,12 @@ func (l *breakableLayer) Info() layer.Info { Size: 1, } } -func (l *breakableLayer) DisableXAttrs() bool { return false } -func (l *breakableLayer) RootNode(uint32) (fusefs.InodeEmbedder, error) { return nil, nil } -func (l *breakableLayer) Verify(tocDigest digest.Digest) error { return nil } -func (l *breakableLayer) SkipVerify() {} +func (l *breakableLayer) DisableXAttrs() bool { return false } +func (l *breakableLayer) RootNode(uint32, idtools.IDMap) (fusefs.InodeEmbedder, error) { + return nil, nil +} +func (l *breakableLayer) Verify(tocDigest digest.Digest) error { return nil } +func (l *breakableLayer) SkipVerify() {} func (l *breakableLayer) ReadAt([]byte, int64, ...remote.Option) (int, error) { return 0, fmt.Errorf("fail") } diff --git a/fs/layer/layer.go b/fs/layer/layer.go index 91479ea84..778278bf7 100644 --- a/fs/layer/layer.go +++ b/fs/layer/layer.go @@ -58,6 +58,7 @@ import ( "github.com/awslabs/soci-snapshotter/fs/remote" spanmanager "github.com/awslabs/soci-snapshotter/fs/span-manager" + "github.com/awslabs/soci-snapshotter/idtools" "github.com/awslabs/soci-snapshotter/metadata" "github.com/awslabs/soci-snapshotter/soci" "github.com/awslabs/soci-snapshotter/util/lrucache" @@ -86,7 +87,7 @@ type Layer interface { Info() Info // RootNode returns the root node of this layer. - RootNode(baseInode uint32) (fusefs.InodeEmbedder, error) + RootNode(baseInode uint32, idMapper idtools.IDMap) (fusefs.InodeEmbedder, error) // Check checks if the layer is still connectable. Check() error @@ -489,14 +490,14 @@ func (l *layerRef) Done() { l.done() } -func (l *layer) RootNode(baseInode uint32) (fusefs.InodeEmbedder, error) { +func (l *layer) RootNode(baseInode uint32, idMapper idtools.IDMap) (fusefs.InodeEmbedder, error) { if l.isClosed() { return nil, fmt.Errorf("layer is already closed") } if l.r == nil { return nil, fmt.Errorf("layer hasn't been verified yet") } - return newNode(l.desc.Digest, l.r, l.blob, baseInode, l.resolver.overlayOpaqueType, l.resolver.config.LogFuseOperations, l.fuseOperationCounter) + return newNode(l.desc.Digest, l.r, l.blob, baseInode, l.resolver.overlayOpaqueType, l.resolver.config.LogFuseOperations, l.fuseOperationCounter, idMapper) } func (l *layer) ReadAt(p []byte, offset int64, opts ...remote.Option) (int, error) { diff --git a/fs/layer/node.go b/fs/layer/node.go index 6459c1a48..bbb813f79 100644 --- a/fs/layer/node.go +++ b/fs/layer/node.go @@ -56,6 +56,7 @@ import ( commonmetrics "github.com/awslabs/soci-snapshotter/fs/metrics/common" "github.com/awslabs/soci-snapshotter/fs/reader" "github.com/awslabs/soci-snapshotter/fs/remote" + "github.com/awslabs/soci-snapshotter/idtools" "github.com/awslabs/soci-snapshotter/metadata" "github.com/containerd/log" fusefs "github.com/hanwen/go-fuse/v2/fs" @@ -189,7 +190,7 @@ func (f *FuseOperationCounter) Run(ctx context.Context) { // logFSOperations may cause sensitive information to be emitted to logs // e.g. filenames and paths within an image -func newNode(layerDgst digest.Digest, r reader.Reader, blob remote.Blob, baseInode uint32, opaque OverlayOpaqueType, logFSOperations bool, opCounter *FuseOperationCounter) (fusefs.InodeEmbedder, error) { +func newNode(layerDgst digest.Digest, r reader.Reader, blob remote.Blob, baseInode uint32, opaque OverlayOpaqueType, logFSOperations bool, opCounter *FuseOperationCounter, idMapper idtools.IDMap) (fusefs.InodeEmbedder, error) { rootID := r.Metadata().RootID() rootAttr, err := r.Metadata().GetAttr(rootID) if err != nil { @@ -210,9 +211,10 @@ func newNode(layerDgst digest.Digest, r reader.Reader, blob remote.Blob, baseIno } ffs.s = ffs.newState(layerDgst, blob) return &node{ - id: rootID, - attr: rootAttr, - fs: ffs, + id: rootID, + attr: rootAttr, + fs: ffs, + idMapper: idMapper, }, nil } @@ -272,9 +274,10 @@ func (fs *fs) inodeOfID(id uint32) (uint64, error) { // node is a filesystem inode abstraction. type node struct { fusefs.Inode - fs *fs - id uint32 - attr metadata.Attr + fs *fs + id uint32 + attr metadata.Attr + idMapper idtools.IDMap ents []fuse.DirEntry entsCached bool @@ -407,14 +410,14 @@ func (n *node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fu n.fs.reportFailure(fuseOpLookup, fmt.Errorf("%s: %v", fuseOpLookup, err)) return nil, syscall.EIO } - entryToAttr(ino, tn.attr, &out.Attr) + n.entryToAttr(ino, tn.attr, &out.Attr) case *whiteout: ino, err := n.fs.inodeOfID(tn.id) if err != nil { n.fs.reportFailure(fuseOpLookup, fmt.Errorf("%s: %v", fuseOpLookup, err)) return nil, syscall.EIO } - entryToAttr(ino, tn.attr, &out.Attr) + n.entryToAttr(ino, tn.attr, &out.Attr) default: n.fs.reportFailure(fuseOpLookup, fmt.Errorf("%s: unknown node type detected", fuseOpLookup)) return nil, syscall.EIO @@ -463,10 +466,11 @@ func (n *node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fu return nil, syscall.EIO } return n.NewInode(ctx, &node{ - id: id, - fs: n.fs, - attr: ce, - }, entryToAttr(ino, ce, &out.Attr)), 0 + id: id, + fs: n.fs, + attr: ce, + idMapper: n.idMapper, + }, n.entryToAttr(ino, ce, &out.Attr)), 0 } var _ = (fusefs.NodeOpener)((*node)(nil)) @@ -495,7 +499,7 @@ func (n *node) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrO n.fs.reportFailure(fuseOpGetattr, fmt.Errorf("%s: %v", fuseOpGetattr, err)) return syscall.EIO } - entryToAttr(ino, n.attr, &out.Attr) + n.entryToAttr(ino, n.attr, &out.Attr) return 0 } @@ -594,7 +598,7 @@ func (f *file) Getattr(ctx context.Context, out *fuse.AttrOut) syscall.Errno { f.n.fs.reportFailure(fuseOpFileGetattr, fmt.Errorf("%s: %v", fuseOpFileGetattr, err)) return syscall.EIO } - entryToAttr(ino, f.n.attr, &out.Attr) + f.n.entryToAttr(ino, f.n.attr, &out.Attr) return 0 } @@ -797,7 +801,7 @@ func (sf *statFile) updateStatUnlocked() ([]byte, error) { } // entryToAttr converts metadata.Attr to go-fuse's Attr. -func entryToAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAttr { +func (n *node) entryToAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAttr { out.Ino = ino out.Size = uint64(e.Size) if e.Mode&os.ModeSymlink != 0 { @@ -808,7 +812,9 @@ func entryToAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAttr mtime := e.ModTime out.SetTimes(nil, &mtime, nil) out.Mode = fileModeToSystemMode(e.Mode) - out.Owner = fuse.Owner{Uid: uint32(e.UID), Gid: uint32(e.GID)} + // Potentially dangerous casting int -> uint32? But probably fine. + mappedId, _ := n.idMapper.ToHost(idtools.User{Uid: uint32(e.UID), Gid: uint32(e.GID)}) + out.Owner = fuse.Owner{Uid: mappedId.Uid, Gid: mappedId.Gid} out.Rdev = uint32(unix.Mkdev(uint32(e.DevMajor), uint32(e.DevMinor))) out.Nlink = uint32(e.NumLink) if out.Nlink == 0 { diff --git a/fs/layer/node_test.go b/fs/layer/node_test.go index 432b3b66e..9f12e6810 100644 --- a/fs/layer/node_test.go +++ b/fs/layer/node_test.go @@ -50,7 +50,8 @@ func TestEntryToAttr(t *testing.T) { tc := tc t.Run(tc.name, func(t *testing.T) { var actual fuse.Attr - entryToAttr(0, tc.attr, &actual) + var n node + n.entryToAttr(0, tc.attr, &actual) tc.expected.Mtime = actual.Mtime if actual != tc.expected { t.Fatalf("unexpected fuse attr. actual %v expected %v", actual, tc.expected) diff --git a/fs/layer/util_test.go b/fs/layer/util_test.go index 2271f5556..c6134308e 100644 --- a/fs/layer/util_test.go +++ b/fs/layer/util_test.go @@ -56,6 +56,7 @@ import ( "github.com/awslabs/soci-snapshotter/fs/reader" "github.com/awslabs/soci-snapshotter/fs/remote" spanmanager "github.com/awslabs/soci-snapshotter/fs/span-manager" + "github.com/awslabs/soci-snapshotter/idtools" "github.com/awslabs/soci-snapshotter/metadata" "github.com/awslabs/soci-snapshotter/util/testutil" "github.com/awslabs/soci-snapshotter/ztoc" @@ -362,7 +363,7 @@ func hasSize(name string, size int) check { } func getRootNode(t *testing.T, r reader.Reader, opaque OverlayOpaqueType) *node { - rootNode, err := newNode(testStateLayerDigest, &testReader{r}, &testBlobState{10, 5}, 100, opaque, false, nil) + rootNode, err := newNode(testStateLayerDigest, &testReader{r}, &testBlobState{10, 5}, 100, opaque, false, nil, idtools.IDMap{}) if err != nil { t.Fatalf("failed to get root node: %v", err) } diff --git a/fs/source/source.go b/fs/source/source.go index ce394d840..9da0b36b2 100644 --- a/fs/source/source.go +++ b/fs/source/source.go @@ -84,6 +84,9 @@ const ( // TargetSociIndexDigestLabel is a label which contains the digest of the soci index. TargetSociIndexDigestLabel = "containerd.io/snapshot/remote/soci.index.digest" + + // HasSociIndexDigest is a label that tells if the layer was pulled with a SOCI index. + HasSociIndexDigest = "containerd.io/snapshot/remote/has.soci.index.digest" ) // RegistryHosts is copied from [github.com/awslabs/soci-snapshotter/service/resolver.RegistryHosts] diff --git a/go.mod b/go.mod index c0f49bdd3..a95db767b 100644 --- a/go.mod +++ b/go.mod @@ -27,6 +27,7 @@ require ( github.com/prometheus/client_golang v1.20.4 github.com/rs/xid v1.6.0 github.com/sirupsen/logrus v1.9.3 + github.com/stretchr/testify v1.9.0 go.etcd.io/bbolt v1.3.11 golang.org/x/crypto v0.27.0 golang.org/x/sync v0.8.0 @@ -90,7 +91,6 @@ require ( github.com/prometheus/common v0.55.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/testify v1.9.0 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect go.opentelemetry.io/otel v1.21.0 // indirect diff --git a/idtools/idmap.go b/idtools/idmap.go index be34a0f0e..9e82ec5a8 100644 --- a/idtools/idmap.go +++ b/idtools/idmap.go @@ -34,9 +34,17 @@ package idtools import ( + "context" + "encoding/json" "errors" "fmt" + "os" + "os/exec" + "path/filepath" + "syscall" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/snapshots" "github.com/opencontainers/runtime-spec/specs-go" ) @@ -60,6 +68,21 @@ type IDMap struct { GidMap []specs.LinuxIDMapping `json:"GidMap"` } +func LoadIdMap(id string, labels map[string]string) (IDMap, error) { + var idmap IDMap + uidmapJson, okUid := labels[snapshots.LabelSnapshotUIDMapping] + gidmapJson, okGid := labels[snapshots.LabelSnapshotGIDMapping] + if okUid && okGid { + if err := json.Unmarshal([]byte(uidmapJson), &idmap.UidMap); err != nil { + return IDMap{}, err + } + if err := json.Unmarshal([]byte(gidmapJson), &idmap.GidMap); err != nil { + return IDMap{}, err + } + } + return idmap, nil +} + // ToHost returns the host user ID pair for the container ID pair. func (i IDMap) ToHost(pair User) (User, error) { var ( @@ -113,3 +136,48 @@ func safeSum(x, y uint32) (uint32, error) { } return z, nil } + +func RemapDir(ctx context.Context, originalMountpoint, newMountpoint string, idMap IDMap) error { + idmappedSnapshotBase := filepath.Dir(newMountpoint) + if err := os.Mkdir(idmappedSnapshotBase, 0755); err != nil { + return err + } + + if err := exec.Command("cp", "-R", originalMountpoint, idmappedSnapshotBase).Run(); err != nil { + // if err := continuity.CopyDir(newMountpoint, originalMountpoint); err != nil { + return err + } + return filepath.Walk(newMountpoint, chown(idMap)) +} + +func RemapRoot(ctx context.Context, root string, idMap IDMap) error { + return filepath.Walk(root, chown(idMap)) +} + +func RemapRootFS(ctx context.Context, mounts []mount.Mount, idmap IDMap) error { + return mount.WithTempMount(ctx, mounts, func(root string) error { + return filepath.Walk(root, chown(idmap)) + }) +} + +func chown(idMap IDMap) filepath.WalkFunc { + return func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + stat := info.Sys().(*syscall.Stat_t) + h, cerr := idMap.ToHost(User{Uid: stat.Uid, Gid: stat.Gid}) + if cerr != nil { + return cerr + } + // be sure the lchown the path as to not de-reference the symlink to a host file + if cerr = os.Lchown(path, int(h.Uid), int(h.Gid)); cerr != nil { + return cerr + } + // we must retain special permissions such as setuid, setgid and sticky bits + if mode := info.Mode(); mode&os.ModeSymlink == 0 && mode&(os.ModeSetuid|os.ModeSetgid|os.ModeSticky) != 0 { + return os.Chmod(path, mode) + } + return nil + } +} diff --git a/integration/run_test.go b/integration/run_test.go index 260eb7a56..dc7bb72db 100644 --- a/integration/run_test.go +++ b/integration/run_test.go @@ -37,6 +37,7 @@ import ( "bytes" "fmt" "os" + "path/filepath" "regexp" "strconv" "strings" @@ -520,3 +521,132 @@ func TestRunInNamespace(t *testing.T) { } } } + +func TestRunWithIdMap(t *testing.T) { + tests := []struct { + name string + imageName string + indexBuilderFn func(sh *shell.Shell, src imageInfo, opts ...indexBuildOption) string + remapUser string + remapGroup string + remapUid string + remapGid string + checkLocation string + expectedOwner string + }{ + { + name: "with only FUSE layers", + imageName: rabbitmqImage, + indexBuilderFn: func(sh *shell.Shell, src imageInfo, opts ...indexBuildOption) string { + opts = append(opts, withMinLayerSize(0)) + return buildIndex(sh, src, opts...) + }, + remapUser: "dummy-user", + remapGroup: "dummy-group", + remapUid: "123456", + remapGid: "123456", + checkLocation: "usr", + expectedOwner: "123456", + }, + { + name: "with mixed layers", + imageName: rabbitmqImage, + indexBuilderFn: func(sh *shell.Shell, src imageInfo, opts ...indexBuildOption) string { + return buildIndex(sh, src, opts...) + }, + remapUser: "dummy-user", + remapGroup: "dummy-group", + remapUid: "123456", + remapGid: "123456", + checkLocation: "usr", + expectedOwner: "123456", + }, + { + name: "with no SOCI index", + imageName: rabbitmqImage, + indexBuilderFn: func(sh *shell.Shell, src imageInfo, opts ...indexBuildOption) string { + return "" + }, + remapUser: "dummy-user", + remapGroup: "dummy-group", + remapUid: "123456", + remapGid: "123456", + checkLocation: "usr", + expectedOwner: "123456", + }, + } + + baseSnapshotDir := "/var/lib/soci-snapshotter-grpc/snapshotter/snapshots" + baseRuntimeDir := "/run/containerd/io.containerd.runtime.v2.task/default" + testContainerName := "testidmap" + uidPath := "/etc/subuid" + gidPath := "/etc/subgid" + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + regConfig := newRegistryConfig() + sh, done := newShellWithRegistry(t, regConfig) + defer done() + + sh.X("groupadd", "-g", tt.remapGid, tt.remapGroup) + sh.X("useradd", "-u", tt.remapUid, "-g", tt.remapGid, "-m", tt.remapUser) + + subUidFile := fmt.Sprintf("%s:%s:%s", tt.remapUser, tt.remapUid, "1000") + subGidFile := fmt.Sprintf("%s:%s:%s", tt.remapUser, tt.remapGid, "1000") + sh.Pipe(nil, shell.C("echo", subUidFile), shell.C("tee", uidPath)) + sh.Pipe(nil, shell.C("echo", subGidFile), shell.C("tee", gidPath)) + + rebootContainerd(t, sh, "", getSnapshotterConfigToml(t, false)) + imageInfo := dockerhub(tt.imageName) + sh.X("nerdctl", "pull", "-q", tt.imageName) + + filenames, err := sh.OLog("ls", baseSnapshotDir) + if err != nil { + t.Fatalf("error listing files in %s", baseSnapshotDir) + } + + // Copy image, remove blobs, and re-pull with SOCI + copyImage(sh, dockerhub(tt.imageName), regConfig.mirror(tt.imageName)) + indexDigest := tt.indexBuilderFn(sh, regConfig.mirror(tt.imageName)) + if indexDigest != "" { + sh.X("soci", "push", "--user", regConfig.creds(), regConfig.mirror(tt.imageName).ref) + } + sh.X("rm", "-rf", filepath.Join(store.DefaultSociContentStorePath, "blobs", "sha256")) + + pullCmd := imagePullCmd + if indexDigest != "" { + pullCmd = append(pullCmd, "--soci-index-digest", indexDigest) + } + sh.X(append(pullCmd, regConfig.mirror(tt.imageName).ref)...) + // time.Sleep(999999999999999999) + sh.X("ctr-with-idmapping", "run", "-d", + "--remap-labels", + "--userns-remap", tt.remapUser, + "--snapshotter", "soci", + imageInfo.ref, testContainerName, "sleep", "infinity", + ) + + newFilenames, err := sh.OLog("ls", baseSnapshotDir) + if err != nil { + t.Fatalf("error listing files in %s", baseSnapshotDir) + } + + if len(filenames) == len(newFilenames) { + t.Fatalf("error: id-mapping failed") + } + + fullCheckPath := filepath.Join(baseRuntimeDir, testContainerName, "rootfs", tt.checkLocation) + stat, err := sh.OLog("stat", fullCheckPath) + if err != nil { + t.Fatalf("error stat files in %s", fullCheckPath) + } + + strStat := string(stat) + t.Log(strStat) + matchUid := fmt.Sprintf("Uid: (%s", tt.expectedOwner) + if !strings.Contains(strStat, matchUid) { + t.Fatalf("error: file %s did not have uid %s", tt.checkLocation, tt.expectedOwner) + } + }) + } +} diff --git a/service/service.go b/service/service.go index 167168e09..f627508d4 100644 --- a/service/service.go +++ b/service/service.go @@ -119,6 +119,9 @@ func NewSociSnapshotterService(ctx context.Context, root string, serviceCfg *con if serviceCfg.SnapshotterConfig.AllowInvalidMountsOnRestart { snOpts = append(snOpts, snbase.AllowInvalidMountsOnRestart) } + if serviceCfg.FSConfig.AllowIdMap { + snOpts = append(snOpts, snbase.AllowIdMap) + } snapshotter, err = snbase.NewSnapshotter(ctx, snapshotterRoot(root), fs, snOpts...) if err != nil { diff --git a/snapshot/snapshot.go b/snapshot/snapshot.go index f770b5b5b..a0bc004c6 100644 --- a/snapshot/snapshot.go +++ b/snapshot/snapshot.go @@ -44,6 +44,7 @@ import ( commonmetrics "github.com/awslabs/soci-snapshotter/fs/metrics/common" "github.com/awslabs/soci-snapshotter/fs/source" + "github.com/awslabs/soci-snapshotter/idtools" "github.com/containerd/containerd/mount" ctdsnapshotters "github.com/containerd/containerd/pkg/snapshotters" "github.com/containerd/containerd/snapshots" @@ -104,7 +105,10 @@ type FileSystem interface { Mount(ctx context.Context, mountpoint string, labels map[string]string) error Check(ctx context.Context, mountpoint string, labels map[string]string) error Unmount(ctx context.Context, mountpoint string) error + UnmountLocal(ctx context.Context, mountpoint string) error MountLocal(ctx context.Context, mountpoint string, labels map[string]string, mounts []mount.Mount) error + IdMapMount(ctx context.Context, mountpoint, activeLayerKey string, idmap idtools.IDMap) (string, error) + IdMapMountLocal(ctx context.Context, mountpoint, activeLayerKey string, idmap idtools.IDMap) (string, error) } // SnapshotterConfig is used to configure the remote snapshotter instance @@ -113,6 +117,7 @@ type SnapshotterConfig struct { // minLayerSize skips remote mounting of smaller layers minLayerSize int64 allowInvalidMountsOnRestart bool + allowIdMap bool } // Opt is an option to configure the remote snapshotter @@ -140,6 +145,11 @@ func AllowInvalidMountsOnRestart(config *SnapshotterConfig) error { return nil } +func AllowIdMap(config *SnapshotterConfig) error { + config.allowIdMap = true + return nil +} + type snapshotter struct { root string ms *storage.MetaStore @@ -150,6 +160,8 @@ type snapshotter struct { userxattr bool // whether to enable "userxattr" mount option minLayerSize int64 // minimum layer size for remote mounting allowInvalidMountsOnRestart bool + allowIdMap bool + idmapped map[string]interface{} } // NewSnapshotter returns a Snapshotter which can use unpacked remote layers @@ -200,6 +212,8 @@ func NewSnapshotter(ctx context.Context, root string, targetFs FileSystem, opts userxattr: userxattr, minLayerSize: config.minLayerSize, allowInvalidMountsOnRestart: config.allowInvalidMountsOnRestart, + allowIdMap: config.allowIdMap, + idmapped: make(map[string]interface{}), } if err := o.restoreRemoteSnapshot(ctx); err != nil { @@ -285,6 +299,48 @@ func (o *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, e return usage, nil } +func (o *snapshotter) setupIdMap(ctx context.Context, s storage.Snapshot, parent string, labels map[string]string) error { + // load id-map if appropriate labels are present. + idmap, err := idtools.LoadIdMap(s.ID, labels) + if err != nil { + log.G(ctx).Errorf("failed to load id-map: %v", err) + return err + } + + if !idmap.Empty() { + parentSnapshot, err := o.Stat(ctx, parent) + if err != nil { + log.G(ctx).Errorf("failed to stat parent snapshot: %v", err) + return err + } + + // If there is no SOCI index, you can safely mount from the root without copying over every single layer + if _, ok := parentSnapshot.Labels[source.HasSociIndexDigest]; !ok { + // Fallback to overlay + log.G(ctx).Info("no SOCI index found, remapping from root") + mounts, err := o.mounts(ctx, s, parent) + if err != nil { + return err + } + + err = idtools.RemapRootFS(ctx, mounts, idmap) + if err != nil { + return err + } + } else { + o.idmapped[s.ID] = struct{}{} + err = o.createIdMapMounts(ctx, s, idmap) + if err != nil { + log.G(ctx).Errorf("failed to create id-mapped mounts: %v", err) + return err + } + } + + log.G(ctx).Info("id-mapping successful") + } + return nil +} + func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) { log.G(ctx).WithField("key", key).WithField("parent", parent).Debug("prepare") s, err := o.createSnapshot(ctx, snapshots.KindActive, key, parent, opts) @@ -302,7 +358,15 @@ func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...s } target, ok := base.Labels[targetSnapshotLabel] + // !ok means we are in an active snapshot if !ok { + // Setup id-mapped mounts if config allows. + // Any error here needs to stop the container from starting. + if o.allowIdMap { + if err := o.setupIdMap(ctx, s, parent, base.Labels); err != nil { + return nil, err + } + } return o.mounts(ctx, s, parent) } @@ -319,7 +383,8 @@ func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...s if !o.skipRemoteSnapshotPrepare(lCtx, base.Labels) { err := o.prepareRemoteSnapshot(lCtx, key, base.Labels) if err == nil { - base.Labels[remoteLabel] = remoteLabelVal // Mark this snapshot as remote + base.Labels[remoteLabel] = remoteLabelVal // Mark this snapshot as remote + base.Labels[source.HasSociIndexDigest] = "true" // Mark that this snapshot was loaded with a SOCI index err := o.commit(ctx, true, target, key, append(opts, snapshots.WithLabels(base.Labels))...) if err == nil || errdefs.IsAlreadyExists(err) { // count also AlreadyExists as "success" @@ -361,6 +426,7 @@ func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...s log.G(ctx).WithField("layerDigest", base.Labels[ctdsnapshotters.TargetLayerDigestLabel]).Info("preparing snapshot as local snapshot") err = o.prepareLocalSnapshot(lCtx, key, base.Labels, mounts) if err == nil { + base.Labels[source.HasSociIndexDigest] = "true" // Mark that this snapshot was loaded with a SOCI index err := o.commit(ctx, false, target, key, append(opts, snapshots.WithLabels(base.Labels))...) if err == nil || errdefs.IsAlreadyExists(err) { // count also AlreadyExists as "success" @@ -578,7 +644,13 @@ func (o *snapshotter) getCleanupDirectories(ctx context.Context, t storage.Trans cleanup := []string{} for _, d := range dirs { if !cleanupCommitted { - if _, ok := ids[d]; ok { + currDir := d + // Use the id-mapped suffix if present + temp := strings.Split(d, "_") + if len(temp) > 1 { + currDir = temp[1] + } + if _, ok := ids[currDir]; ok { continue } } @@ -613,7 +685,10 @@ func (o *snapshotter) unmountSnapshotDirectory(ctx context.Context, dir string) return err } if mounted { - return o.fs.Unmount(ctx, mp) + if err := o.fs.Unmount(ctx, mp); err == errdefs.ErrNotFound { + return o.fs.UnmountLocal(ctx, mp) + } + return err } return nil } @@ -757,15 +832,16 @@ func (o *snapshotter) mounts(ctx context.Context, s storage.Snapshot, checkKey s }, nil } - parentPaths := make([]string, len(s.ParentIDs)) - for i := range s.ParentIDs { - parentPaths[i] = o.upperPath(s.ParentIDs[i]) + parentPaths, err := o.getParentPaths(ctx, s) + if err != nil { + return nil, err } options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(parentPaths, ":"))) if o.userxattr { options = append(options, "userxattr") } + return []mount.Mount{ { Type: "overlay", @@ -773,7 +849,50 @@ func (o *snapshotter) mounts(ctx context.Context, s storage.Snapshot, checkKey s Options: options, }, }, nil +} + +func (o *snapshotter) getParentPaths(ctx context.Context, s storage.Snapshot) ([]string, error) { + parentPaths := make([]string, len(s.ParentIDs)) + + for i := range s.ParentIDs { + id := s.ParentIDs[i] + if _, ok := o.idmapped[s.ID]; ok { + id = fmt.Sprintf("%s_%s", s.ParentIDs[i], s.ID) + } + parentPaths[i] = o.upperPath(id) + } + + return parentPaths, nil +} + +func (o *snapshotter) createIdMapMounts(ctx context.Context, s storage.Snapshot, idmap idtools.IDMap) error { + log.G(ctx).Infof("mapping ids") + + for _, id := range s.ParentIDs { + err := o.createIdMapMount(ctx, o.upperPath(id), s.ID, idmap) + if err != nil { + return err + } + } + return idtools.RemapRoot(ctx, o.upperPath(s.ID), idmap) +} + +func (o *snapshotter) createIdMapMount(ctx context.Context, path, id string, idmap idtools.IDMap) error { + // s.ID is the shortest unique identifier for each new container, + // so append it to the end of the new mountpoint + _, err := o.fs.IdMapMount(ctx, path, id, idmap) + if err == errdefs.ErrNotFound { + // Remote mount failed, attempt to create a local id-mapped mount + + // Cleanup dirty snapshot folder — perhaps we can have a return cleanup func? + dirtyDir := fmt.Sprintf("%s_%s", filepath.Dir(path), id) + if err := os.RemoveAll(dirtyDir); err != nil { + return err + } + _, err = o.fs.IdMapMountLocal(ctx, path, id, idmap) + } + return err } // upperPath produces a file path like "{snapshotter.root}/snapshots/{id}/fs" diff --git a/snapshot/snapshot_test.go b/snapshot/snapshot_test.go index cc798a607..6ee794d5b 100644 --- a/snapshot/snapshot_test.go +++ b/snapshot/snapshot_test.go @@ -41,6 +41,7 @@ import ( "syscall" "testing" + "github.com/awslabs/soci-snapshotter/idtools" "github.com/containerd/containerd/mount" "github.com/containerd/containerd/pkg/testutil" "github.com/containerd/containerd/snapshots" @@ -406,6 +407,9 @@ func (fs *bindFs) Check(ctx context.Context, mountpoint string, labels map[strin func (fs *bindFs) Unmount(ctx context.Context, mountpoint string) error { return syscall.Unmount(mountpoint, 0) } +func (fs *bindFs) UnmountLocal(ctx context.Context, mountpoint string) error { + return errdefs.ErrNotImplemented +} func (fs *bindFs) MountLocal(ctx context.Context, mountpoint string, labels map[string]string, mounts []mount.Mount) error { if _, ok := labels[brokenLabel]; ok { @@ -417,6 +421,14 @@ func (fs *bindFs) MountLocal(ctx context.Context, mountpoint string, labels map[ return nil } +func (fs *bindFs) IdMapMount(ctx context.Context, mountpoint, activeLayerKey string, idmap idtools.IDMap) (string, error) { + return mountpoint, nil +} + +func (fs *bindFs) IdMapMountLocal(ctx context.Context, mountpoint, activeLayerKey string, idmap idtools.IDMap) (string, error) { + return mountpoint, nil +} + func dummyFileSystem() FileSystem { return &dummyFs{} } type dummyFs struct{} @@ -433,10 +445,22 @@ func (fs *dummyFs) Unmount(ctx context.Context, mountpoint string) error { return fmt.Errorf("dummy") } +func (fs *dummyFs) UnmountLocal(ctx context.Context, mountpoint string) error { + return fmt.Errorf("dummy") +} + func (fs *dummyFs) MountLocal(ctx context.Context, mountpoint string, labels map[string]string, mounts []mount.Mount) error { return fmt.Errorf("dummy") } +func (fs *dummyFs) IdMapMount(ctx context.Context, mountpoint, activeLayerKey string, idmap idtools.IDMap) (string, error) { + return "", fmt.Errorf("dummy") +} + +func (fs *dummyFs) IdMapMountLocal(ctx context.Context, mountpoint, activeLayerKey string, idmap idtools.IDMap) (string, error) { + return "", fmt.Errorf("dummy") +} + // ============================================================================= // Tests backword-comaptibility of overlayfs snapshotter.