diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index eff2f94c9..babeec8c4 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -26,7 +26,7 @@ jobs: strategy: fail-fast: false matrix: - module: [taps, misc, profile, networking] + module: [taps, misc, profile, networking, snapshotting] steps: - name: Set up Go 1.19 diff --git a/cri/firecracker/coordinator.go b/cri/firecracker/coordinator.go index 12dc0d518..4aa46a8ab 100644 --- a/cri/firecracker/coordinator.go +++ b/cri/firecracker/coordinator.go @@ -25,6 +25,7 @@ package firecracker import ( "context" "errors" + "github.com/vhive-serverless/vhive/snapshotting" "strconv" "sync" "sync/atomic" @@ -40,7 +41,7 @@ type coordinator struct { nextID uint64 activeInstances map[string]*funcInstance - idleInstances map[string][]*funcInstance + snapshotManager *snapshotting.SnapshotManager withoutOrchestrator bool } @@ -56,7 +57,6 @@ func withoutOrchestrator() coordinatorOption { func newFirecrackerCoordinator(orch *ctriface.Orchestrator, opts ...coordinatorOption) *coordinator { c := &coordinator{ activeInstances: make(map[string]*funcInstance), - idleInstances: make(map[string][]*funcInstance), orch: orch, } @@ -64,38 +64,13 @@ func newFirecrackerCoordinator(orch *ctriface.Orchestrator, opts ...coordinatorO opt(c) } - return c -} - -func (c *coordinator) getIdleInstance(image string) *funcInstance { - c.Lock() - defer c.Unlock() - - idles, ok := c.idleInstances[image] - if !ok { - c.idleInstances[image] = []*funcInstance{} - return nil - } - - if len(idles) != 0 { - fi := idles[0] - c.idleInstances[image] = idles[1:] - return fi - } - - return nil -} - -func (c *coordinator) setIdleInstance(fi *funcInstance) { - c.Lock() - defer c.Unlock() - - _, ok := c.idleInstances[fi.Image] - if !ok { - c.idleInstances[fi.Image] = []*funcInstance{} + snapshotsDir := "/fccd/test/snapshots" + if !c.withoutOrchestrator { + snapshotsDir = orch.GetSnapshotsDir() } + c.snapshotManager = snapshotting.NewSnapshotManager(snapshotsDir) - c.idleInstances[fi.Image] = append(c.idleInstances[fi.Image], fi) + return c } func (c *coordinator) startVM(ctx context.Context, image string) (*funcInstance, error) { @@ -103,9 +78,11 @@ func (c *coordinator) startVM(ctx context.Context, image string) (*funcInstance, } func (c *coordinator) startVMWithEnvironment(ctx context.Context, image string, environment []string) (*funcInstance, error) { - if fi := c.getIdleInstance(image); c.orch != nil && c.orch.GetSnapshotsEnabled() && fi != nil { - err := c.orchLoadInstance(ctx, fi) - return fi, err + if c.orch != nil && c.orch.GetSnapshotsEnabled() { + // Check if snapshot is available + if snap, err := c.snapshotManager.AcquireSnapshot(image); err == nil { + return c.orchLoadInstance(ctx, snap) + } } return c.orchStartVM(ctx, image, environment) @@ -134,7 +111,6 @@ func (c *coordinator) stopVM(ctx context.Context, containerID string) error { func (c *coordinator) isActive(containerID string) bool { c.Lock() defer c.Unlock() - _, ok := c.activeInstances[containerID] return ok } @@ -185,29 +161,44 @@ func (c *coordinator) orchStartVM(ctx context.Context, image string, envVariable return fi, err } -func (c *coordinator) orchLoadInstance(ctx context.Context, fi *funcInstance) error { - fi.Logger.Debug("found idle instance to load") +func (c *coordinator) orchLoadInstance(ctx context.Context, snap *snapshotting.Snapshot) (*funcInstance, error) { + logger := log.WithFields( + log.Fields{ + "vmID": snap.GetId(), + "image": snap.GetImage(), + }, + ) + + logger.Debug("found idle instance to load") ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*30) defer cancel() - if _, err := c.orch.LoadSnapshot(ctxTimeout, fi.VmID); err != nil { - fi.Logger.WithError(err).Error("failed to load VM") - return err + resp, _, err := c.orch.LoadSnapshot(ctxTimeout, snap.GetId(), snap) + if err != nil { + logger.WithError(err).Error("failed to load VM") + return nil, err } - if _, err := c.orch.ResumeVM(ctxTimeout, fi.VmID); err != nil { - fi.Logger.WithError(err).Error("failed to load VM") - return err + if _, err := c.orch.ResumeVM(ctxTimeout, snap.GetId()); err != nil { + logger.WithError(err).Error("failed to load VM") + return nil, err } - fi.Logger.Debug("successfully loaded idle instance") - return nil + fi := newFuncInstance(snap.GetId(), snap.GetImage(), resp) + logger.Debug("successfully loaded idle instance") + return fi, nil } func (c *coordinator) orchCreateSnapshot(ctx context.Context, fi *funcInstance) error { var err error + snap, err := c.snapshotManager.InitSnapshot(fi.VmID, fi.Image) + if err != nil { + fi.Logger.WithError(err).Error("failed to initialize snapshot") + return nil + } + fi.OnceCreateSnapInstance.Do( func() { ctxTimeout, cancel := context.WithTimeout(ctx, time.Second*60) @@ -221,7 +212,7 @@ func (c *coordinator) orchCreateSnapshot(ctx context.Context, fi *funcInstance) return } - err = c.orch.CreateSnapshot(ctxTimeout, fi.VmID) + err = c.orch.CreateSnapshot(ctxTimeout, fi.VmID, snap) if err != nil { fi.Logger.WithError(err).Error("failed to create snapshot") return @@ -229,7 +220,12 @@ func (c *coordinator) orchCreateSnapshot(ctx context.Context, fi *funcInstance) }, ) - return err + if err := c.snapshotManager.CommitSnapshot(fi.VmID); err != nil { + fi.Logger.WithError(err).Error("failed to commit snapshot") + return err + } + + return nil } func (c *coordinator) orchOffloadInstance(ctx context.Context, fi *funcInstance) error { @@ -246,8 +242,6 @@ func (c *coordinator) orchOffloadInstance(ctx context.Context, fi *funcInstance) fi.Logger.WithError(err).Error("failed to offload instance") } - c.setIdleInstance(fi) - return nil } diff --git a/ctriface/failing_test.go b/ctriface/failing_test.go index 0f07caecb..8cb19ae80 100644 --- a/ctriface/failing_test.go +++ b/ctriface/failing_test.go @@ -28,6 +28,7 @@ import ( "testing" "time" + "github.com/vhive-serverless/vhive/snapshotting" ctrdlog "github.com/containerd/containerd/log" "github.com/containerd/containerd/namespaces" log "github.com/sirupsen/logrus" @@ -61,13 +62,14 @@ func TestStartSnapStop(t *testing.T) { err = orch.PauseVM(ctx, vmID) require.NoError(t, err, "Failed to pause VM") - err = orch.CreateSnapshot(ctx, vmID) + snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + err = orch.CreateSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to create snapshot of VM") err = orch.Offload(ctx, vmID) require.NoError(t, err, "Failed to offload VM") - _, err = orch.LoadSnapshot(ctx, vmID) + _, _, err = orch.LoadSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) diff --git a/ctriface/iface.go b/ctriface/iface.go index da4623cd3..e69b6f0ef 100644 --- a/ctriface/iface.go +++ b/ctriface/iface.go @@ -24,6 +24,11 @@ package ctriface import ( "context" + "fmt" + "github.com/vhive-serverless/vhive/snapshotting" + "net" + "net/http" + "net/url" "os" "os/exec" "strings" @@ -392,7 +397,7 @@ func (o *Orchestrator) ResumeVM(ctx context.Context, vmID string) (*metrics.Metr } // CreateSnapshot Creates a snapshot of a VM -func (o *Orchestrator) CreateSnapshot(ctx context.Context, vmID string) error { +func (o *Orchestrator) CreateSnapshot(ctx context.Context, vmID string, snap *snapshotting.Snapshot) error { logger := log.WithFields(log.Fields{"vmID": vmID}) logger.Debug("Orchestrator received CreateSnapshot") @@ -400,8 +405,8 @@ func (o *Orchestrator) CreateSnapshot(ctx context.Context, vmID string) error { req := &proto.CreateSnapshotRequest{ VMID: vmID, - SnapshotFilePath: o.getSnapshotFile(vmID), - MemFilePath: o.getMemoryFile(vmID), + SnapshotFilePath: snap.GetSnapshotFilePath(), + MemFilePath: snap.GetMemFilePath(), } if _, err := o.fcClient.CreateSnapshot(ctx, req); err != nil { @@ -413,7 +418,7 @@ func (o *Orchestrator) CreateSnapshot(ctx context.Context, vmID string) error { } // LoadSnapshot Loads a snapshot of a VM -func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string) (*metrics.Metric, error) { +func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string, snap *snapshotting.Snapshot) (*StartVMResponse, *metrics.Metric, error) { var ( loadSnapshotMetric *metrics.Metric = metrics.NewMetric() tStart time.Time @@ -424,18 +429,26 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string) (*metrics. logger := log.WithFields(log.Fields{"vmID": vmID}) logger.Debug("Orchestrator received LoadSnapshot") + vm, err := o.vmPool.GetVM(vmID) + if err != nil { + if _, ok := err.(*misc.NonExistErr); ok { + logger.Panic("LoadSnapshot: VM does not exist") + } + logger.Panic("LoadSnapshot: GetVM() failed for an unknown reason") + } + ctx = namespaces.WithNamespace(ctx, namespaceName) req := &proto.LoadSnapshotRequest{ VMID: vmID, - SnapshotFilePath: o.getSnapshotFile(vmID), - MemFilePath: o.getMemoryFile(vmID), + SnapshotFilePath: snap.GetSnapshotFilePath(), + MemFilePath: snap.GetMemFilePath(), EnableUserPF: o.GetUPFEnabled(), } if o.GetUPFEnabled() { if err := o.memoryManager.FetchState(vmID); err != nil { - return nil, err + return nil, nil, err } } @@ -461,10 +474,10 @@ func (o *Orchestrator) LoadSnapshot(ctx context.Context, vmID string) (*metrics. if loadErr != nil || activateErr != nil { multierr := multierror.Of(loadErr, activateErr) - return nil, multierr + return nil, nil, multierr } - return loadSnapshotMetric, nil + return &StartVMResponse{GuestIP: vm.Ni.PrimaryAddress}, loadSnapshotMetric, nil } // Offload Shuts down the VM but leaves shim and other resources running. diff --git a/ctriface/iface_test.go b/ctriface/iface_test.go index cda2e97a0..34d26feee 100644 --- a/ctriface/iface_test.go +++ b/ctriface/iface_test.go @@ -30,6 +30,7 @@ import ( "testing" "time" + "github.com/vhive-serverless/vhive/snapshotting" ctrdlog "github.com/containerd/containerd/log" "github.com/containerd/containerd/namespaces" log "github.com/sirupsen/logrus" @@ -75,7 +76,8 @@ func TestPauseSnapResume(t *testing.T) { err = orch.PauseVM(ctx, vmID) require.NoError(t, err, "Failed to pause VM") - err = orch.CreateSnapshot(ctx, vmID) + snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + err = orch.CreateSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to create snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) diff --git a/ctriface/manual_cleanup_test.go b/ctriface/manual_cleanup_test.go index c4c4fd33a..1b4c92407 100644 --- a/ctriface/manual_cleanup_test.go +++ b/ctriface/manual_cleanup_test.go @@ -30,6 +30,7 @@ import ( "testing" "time" + "github.com/vhive-serverless/vhive/snapshotting" ctrdlog "github.com/containerd/containerd/log" "github.com/containerd/containerd/namespaces" log "github.com/sirupsen/logrus" @@ -69,7 +70,8 @@ func TestSnapLoad(t *testing.T) { err = orch.PauseVM(ctx, vmID) require.NoError(t, err, "Failed to pause VM") - err = orch.CreateSnapshot(ctx, vmID) + snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + err = orch.CreateSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to create snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -78,7 +80,7 @@ func TestSnapLoad(t *testing.T) { err = orch.Offload(ctx, vmID) require.NoError(t, err, "Failed to offload VM") - _, err = orch.LoadSnapshot(ctx, vmID) + _, _, err = orch.LoadSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -119,13 +121,14 @@ func TestSnapLoadMultiple(t *testing.T) { err = orch.PauseVM(ctx, vmID) require.NoError(t, err, "Failed to pause VM") - err = orch.CreateSnapshot(ctx, vmID) + snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + err = orch.CreateSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to create snapshot of VM") err = orch.Offload(ctx, vmID) require.NoError(t, err, "Failed to offload VM") - _, err = orch.LoadSnapshot(ctx, vmID) + _, _, err = orch.LoadSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -134,7 +137,7 @@ func TestSnapLoadMultiple(t *testing.T) { err = orch.Offload(ctx, vmID) require.NoError(t, err, "Failed to offload VM") - _, err = orch.LoadSnapshot(ctx, vmID) + _, _, err = orch.LoadSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM") _, err = orch.ResumeVM(ctx, vmID) @@ -190,13 +193,14 @@ func TestParallelSnapLoad(t *testing.T) { err = orch.PauseVM(ctx, vmID) require.NoError(t, err, "Failed to pause VM, "+vmID) - err = orch.CreateSnapshot(ctx, vmID) + snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + err = orch.CreateSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to create snapshot of VM, "+vmID) err = orch.Offload(ctx, vmID) require.NoError(t, err, "Failed to offload VM, "+vmID) - _, err = orch.LoadSnapshot(ctx, vmID) + _, _, err = orch.LoadSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM, "+vmID) _, err = orch.ResumeVM(ctx, vmID) @@ -274,7 +278,8 @@ func TestParallelPhasedSnapLoad(t *testing.T) { go func(i int) { defer vmGroup.Done() vmID := fmt.Sprintf("%d", i+vmIDBase) - err := orch.CreateSnapshot(ctx, vmID) + snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + err := orch.CreateSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to create snapshot of VM, "+vmID) }(i) } @@ -302,7 +307,8 @@ func TestParallelPhasedSnapLoad(t *testing.T) { go func(i int) { defer vmGroup.Done() vmID := fmt.Sprintf("%d", i+vmIDBase) - _, err := orch.LoadSnapshot(ctx, vmID) + snap := snapshotting.NewSnapshot(vmID, "/fccd/snapshots", testImageName) + _, _, err := orch.LoadSnapshot(ctx, vmID, snap) require.NoError(t, err, "Failed to load snapshot of VM, "+vmID) }(i) } diff --git a/ctriface/orch.go b/ctriface/orch.go index 1de993a07..f1694b4e1 100644 --- a/ctriface/orch.go +++ b/ctriface/orch.go @@ -199,6 +199,11 @@ func (o *Orchestrator) GetUPFLatencyStats(vmID string) ([]*metrics.Metric, error return o.memoryManager.GetUPFLatencyStats(vmID) } +// GetSnapshotsDir Returns the orchestrator's snapshot directory +func (o *Orchestrator) GetSnapshotsDir() string { + return o.snapshotsDir +} + func (o *Orchestrator) getSnapshotFile(vmID string) string { return filepath.Join(o.getVMBaseDir(vmID), "snap_file") } diff --git a/functions.go b/functions.go index 9c3359f37..c7282dded 100644 --- a/functions.go +++ b/functions.go @@ -42,6 +42,7 @@ import ( hpb "github.com/vhive-serverless/vhive/examples/protobuf/helloworld" "github.com/vhive-serverless/vhive/metrics" + "github.com/vhive-serverless/vhive/snapshotting" "github.com/pkg/errors" log "github.com/sirupsen/logrus" ) @@ -450,7 +451,8 @@ func (f *Function) CreateInstanceSnapshot() { log.Panic(err) } - err = orch.CreateSnapshot(ctx, f.vmID) + snap := snapshotting.NewSnapshot(f.vmID, "/fccd/snapshots", f.imageName) + err = orch.CreateSnapshot(ctx, f.vmID, snap) if err != nil { log.Panic(err) } @@ -487,7 +489,8 @@ func (f *Function) LoadInstance() *metrics.Metric { ctx, cancel := context.WithTimeout(context.Background(), time.Second*60) defer cancel() - loadMetr, err := orch.LoadSnapshot(ctx, f.vmID) + snap := snapshotting.NewSnapshot(f.vmID, "/fccd/snapshots", f.imageName) + _, loadMetr, err := orch.LoadSnapshot(ctx, f.vmID, snap) if err != nil { log.Panic(err) } diff --git a/snapshotting/Makefile b/snapshotting/Makefile new file mode 100644 index 000000000..6d60adc8c --- /dev/null +++ b/snapshotting/Makefile @@ -0,0 +1,33 @@ +# MIT License +# +# Copyright (c) 2023 Georgiy Lebedev, Dmitrii Ustiugov, Plamen Petrov and vHive team +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +EXTRAGOARGS:=-v -race -cover + +test: + # Need to pass GOROOT because GitHub-hosted runners may have several + # go versions installed so that calling go from root may fail + sudo env "PATH=$(PATH)" "GOROOT=$(GOROOT)" go test ./ $(EXTRAGOARGS) + +test-man: + echo "Nothing to test manually" + +.PHONY: test test-man diff --git a/snapshotting/manager.go b/snapshotting/manager.go new file mode 100644 index 000000000..83f9090ac --- /dev/null +++ b/snapshotting/manager.go @@ -0,0 +1,145 @@ +// MIT License +// +// Copyright (c) 2023 Georgiy Lebedev, Amory Hoste and vHive team +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package snapshotting + +import ( + "fmt" + "github.com/pkg/errors" + "os" + "sync" +) + +// SnapshotManager manages snapshots stored on the node. Each snapshot can only be used by a single VM at +// a time and thus is always in one of three states: creating, active or idle. +type SnapshotManager struct { + sync.Mutex + // Snapshots currently in use by a function (identified by the id of the VM using the snapshot) + activeSnapshots map[string]*Snapshot + // Snapshots currently being created (identified by the id of the VM the snapshot is being created for) + creatingSnapshots map[string]*Snapshot + // Offloaded snapshots available for reuse by new VMs (identified by the image name of the snapshot) + idleSnapshots map[string][]*Snapshot + baseFolder string +} + +// Snapshot identified by VM id + +func NewSnapshotManager(baseFolder string) *SnapshotManager { + manager := new(SnapshotManager) + manager.activeSnapshots = make(map[string]*Snapshot) + manager.creatingSnapshots = make(map[string]*Snapshot) + manager.idleSnapshots = make(map[string][]*Snapshot) + manager.baseFolder = baseFolder + + // Clean & init basefolder + _ = os.RemoveAll(manager.baseFolder) + _ = os.MkdirAll(manager.baseFolder, os.ModePerm) + + return manager +} + +// AcquireSnapshot returns an idle snapshot if one is available for the given image +func (mgr *SnapshotManager) AcquireSnapshot(image string) (*Snapshot, error) { + mgr.Lock() + defer mgr.Unlock() + + // Check if idle snapshot is available for the given image + idles, ok := mgr.idleSnapshots[image] + if !ok { + mgr.idleSnapshots[image] = []*Snapshot{} + return nil, errors.New(fmt.Sprintf("There is no snapshot available for image %s", image)) + } + + // Return snapshot for supplied image + if len(idles) != 0 { + snp := idles[0] + mgr.idleSnapshots[image] = idles[1:] + mgr.activeSnapshots[snp.GetId()] = snp + return snp, nil + } + return nil, errors.New(fmt.Sprintf("There is no snapshot available fo rimage %s", image)) +} + +// ReleaseSnapshot releases the snapshot in use by the given VM for offloading so that it can get used to handle a new +// VM creation. +func (mgr *SnapshotManager) ReleaseSnapshot(vmID string) error { + mgr.Lock() + defer mgr.Unlock() + + snap, present := mgr.activeSnapshots[vmID] + if !present { + return errors.New(fmt.Sprintf("Get: Snapshot for container %s does not exist", vmID)) + } + + // Move snapshot from active to idle state + delete(mgr.activeSnapshots, vmID) + mgr.idleSnapshots[snap.Image] = append(mgr.idleSnapshots[snap.Image], snap) + + return nil +} + +// InitSnapshot initializes a snapshot by initializing a new snapshot and moving it to the creating state. CommitSnapshot +// must be run to finalize the snapshot creation and make the snapshot available for use +func (mgr *SnapshotManager) InitSnapshot(vmID, image string) (*Snapshot, error) { + mgr.Lock() + + if _, present := mgr.creatingSnapshots[vmID]; present { + mgr.Unlock() + return nil, errors.New(fmt.Sprintf("Add: Snapshot for vm %s already exists", vmID)) + } + + // Create snapshot object and move into creating state + snap := NewSnapshot(vmID, mgr.baseFolder, image) + mgr.creatingSnapshots[snap.GetId()] = snap + mgr.Unlock() + + // Create directory to store snapshot data + err := snap.CreateSnapDir() + if err != nil { + return nil, errors.Wrapf(err, "creating snapDir for snapshots %s", vmID) + } + + return snap, nil +} + +// CommitSnapshot finalizes the snapshot creation and makes it available for use by moving it into the idle state. +func (mgr *SnapshotManager) CommitSnapshot(vmID string) error { + mgr.Lock() + defer mgr.Unlock() + + // Move snapshot from creating to idle state + snap, ok := mgr.creatingSnapshots[vmID] + if !ok { + return errors.New(fmt.Sprintf("There has no snapshot been created with vmID %s", vmID)) + } + delete(mgr.creatingSnapshots, vmID) + + _, ok = mgr.idleSnapshots[snap.Image] + if !ok { + mgr.idleSnapshots[snap.Image] = []*Snapshot{} + } + + mgr.idleSnapshots[snap.Image] = append(mgr.idleSnapshots[snap.Image], snap) + + return nil +} diff --git a/snapshotting/manager_test.go b/snapshotting/manager_test.go new file mode 100644 index 000000000..ddb2f9a19 --- /dev/null +++ b/snapshotting/manager_test.go @@ -0,0 +1,105 @@ +// MIT License +// +// Copyright (c) 2020 Plamen Petrov, Amory Hoste and EASE lab +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package snapshotting_test + +import ( + "fmt" + ctrdlog "github.com/containerd/containerd/log" + log "github.com/sirupsen/logrus" + "github.com/stretchr/testify/require" + "github.com/vhive-serverless/vhive/snapshotting" + "os" + "sync" + "testing" +) + +const snapshotsDir = "/fccd/test/snapshots" + +func TestMain(m *testing.M) { + // call flag.Parse() here if TestMain uses flags + + log.SetFormatter(&log.TextFormatter{ + TimestampFormat: ctrdlog.RFC3339NanoFixed, + FullTimestamp: true, + }) + + log.SetOutput(os.Stdout) + + log.SetLevel(log.InfoLevel) + + os.Exit(m.Run()) +} + +func testSnapshotManager(t *testing.T, mgr *snapshotting.SnapshotManager, vmId, imageName string) { + // Create snapshot + snap, err := mgr.InitSnapshot(vmId, imageName) + require.NoError(t, err, fmt.Sprintf("Failed to create snapshot for %s", vmId)) + _, err = mgr.InitSnapshot(vmId, imageName) + require.Error(t, err, fmt.Sprintf("Init should fail when a snapshot has already been created for %s", vmId)) + + err = mgr.CommitSnapshot(snap.GetId()) + require.NoError(t, err, fmt.Sprintf("Failed to commit snapshot for %s", vmId)) + err = mgr.CommitSnapshot(snap.GetId()) + require.Error(t, err, fmt.Sprintf("Commit should fail when no snapshots are created for %s", vmId)) + + // Use snapshot + snp, err := mgr.AcquireSnapshot(imageName) + require.NoError(t, err, fmt.Sprintf("Failed to acquire snapshot for %s", imageName)) + _, err = mgr.AcquireSnapshot(imageName) + require.Error(t, err, fmt.Sprintf("Acquire should fail when no snapshots are available for %s", imageName)) + + // Release snapshot + err = mgr.ReleaseSnapshot(snp.GetId()) + require.NoError(t, err, fmt.Sprintf("Failed to release snapshot for %s", imageName)) + err = mgr.ReleaseSnapshot(snp.GetId()) + require.Error(t, err, fmt.Sprintf("Release should fail when there are no active snapshots for %s", vmId)) +} + +func TestSnapshotManagerSingle(t *testing.T) { + // Create snapshot manager + mgr := snapshotting.NewSnapshotManager(snapshotsDir) + + vmId := "uvm1" // Snap id = vmId + imageName := "testImage" + + testSnapshotManager(t, mgr, vmId, imageName) +} + +func TestSnapshotManagerConcurrent(t *testing.T) { + // Create snapshot manager + mgr := snapshotting.NewSnapshotManager(snapshotsDir) + + var wg sync.WaitGroup + concurrency := 20 + wg.Add(concurrency) + + for i := 0; i < concurrency; i++ { + vmId := fmt.Sprintf("uvm%d", i) + imageName := fmt.Sprintf("testImage-%d", i) + go func(vmId, imageName string) { + defer wg.Done() + testSnapshotManager(t, mgr, vmId, imageName) + }(vmId, imageName) + } + wg.Wait() +} diff --git a/snapshotting/snapshot.go b/snapshotting/snapshot.go new file mode 100644 index 000000000..09c47e853 --- /dev/null +++ b/snapshotting/snapshot.go @@ -0,0 +1,127 @@ +// MIT License +// +// Copyright (c) 2023 Georgiy Lebedev, Amory Hoste and vHive team +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. + +package snapshotting + +import ( + "encoding/gob" + "fmt" + "github.com/pkg/errors" + "os" + "path/filepath" + "time" +) + +// Snapshot identified by revision +// Only capitalized fields are serialised / deserialised +type Snapshot struct { + id string + ContainerSnapName string + snapDir string + Image string +} + +func NewSnapshot(id, baseFolder, image string) *Snapshot { + s := &Snapshot{ + id: id, + snapDir: filepath.Join(baseFolder, id), + ContainerSnapName: fmt.Sprintf("%s%s", id, time.Now().Format("20060102150405")), + Image: image, + } + + return s +} + +func (snp *Snapshot) CreateSnapDir() error { + err := os.Mkdir(snp.snapDir, 0755) + if err != nil && os.IsExist(err) { + return nil + } + return err +} + +func (snp *Snapshot) GetImage() string { + return snp.Image +} + +func (snp *Snapshot) GetId() string { + return snp.id +} + +func (snp *Snapshot) GetContainerSnapName() string { + return snp.ContainerSnapName +} + +func (snp *Snapshot) GetSnapshotFilePath() string { + return filepath.Join(snp.snapDir, "snap_file") +} + +func (snp *Snapshot) GetMemFilePath() string { + return filepath.Join(snp.snapDir, "mem_file") +} + +func (snp *Snapshot) GetPatchFilePath() string { + return filepath.Join(snp.snapDir, "patch_file") +} + +func (snp *Snapshot) GetInfoFilePath() string { + return filepath.Join(snp.snapDir, "info_file") +} + +// SerializeSnapInfo serializes the snapshot info using gob. This can be useful for remote snapshots +func (snp *Snapshot) SerializeSnapInfo() error { + file, err := os.Create(snp.GetInfoFilePath()) + if err != nil { + return errors.Wrapf(err, "failed to create snapinfo file") + } + defer file.Close() + + encoder := gob.NewEncoder(file) + + err = encoder.Encode(*snp) + if err != nil { + return errors.Wrapf(err, "failed to encode snapinfo") + } + return nil +} + +// LoadSnapInfo loads the snapshot info from a file. This can be useful for remote snapshots. +func (snp *Snapshot) LoadSnapInfo(infoPath string) error { + file, err := os.Open(infoPath) + if err != nil { + return errors.Wrapf(err, "failed to open snapinfo file") + } + defer file.Close() + + encoder := gob.NewDecoder(file) + + err = encoder.Decode(snp) + if err != nil { + return errors.Wrapf(err, "failed to decode snapinfo") + } + + return nil +} + +func (snp *Snapshot) Cleanup() error { + return os.RemoveAll(snp.snapDir) +}