From e80ad093d6a3bd905a4c3b2b60d63130cca5d3ed Mon Sep 17 00:00:00 2001 From: Brandon Duffany Date: Wed, 27 Nov 2024 15:55:54 -0500 Subject: [PATCH] Some improvements to firecracker VFS --- enterprise/server/cmd/goinit/main.go | 6 +- .../remote_execution/container/container.go | 2 - .../containers/firecracker/BUILD | 2 + .../containers/firecracker/firecracker.go | 179 +++++++++++------- .../firecracker/firecracker_test.go | 83 ++++++++ .../remote_execution/platform/platform.go | 4 +- .../server/remote_execution/runner/runner.go | 2 - enterprise/tools/vmstart/vmstart.go | 2 - proto/firecracker.proto | 1 + 9 files changed, 204 insertions(+), 77 deletions(-) diff --git a/enterprise/server/cmd/goinit/main.go b/enterprise/server/cmd/goinit/main.go index 4a186d982da..6db9f6bb5a1 100644 --- a/enterprise/server/cmd/goinit/main.go +++ b/enterprise/server/cmd/goinit/main.go @@ -309,7 +309,9 @@ func main() { } die(mkdirp("/mnt/workspace", 0755)) - die(mount(workspaceDevice, "/mnt/workspace", "ext4", syscall.MS_NOATIME, "")) + if !*enableVFS { + die(mount(workspaceDevice, "/mnt/workspace", "ext4", syscall.MS_NOATIME, "")) + } die(mkdirp("/mnt/dev", 0755)) die(mount("/dev", "/mnt/dev", "", syscall.MS_MOVE, "")) @@ -444,7 +446,7 @@ func main() { log.Printf("Finished init in %s", time.Since(start)) if err := eg.Wait(); err != nil { - log.Errorf("Init errgroup finished with err: %s", err) + log.Fatalf("Child process failed: %s", err) } // Halt the system explicitly to prevent a kernel panic. diff --git a/enterprise/server/remote_execution/container/container.go b/enterprise/server/remote_execution/container/container.go index 5faa3318e3f..8b86f4632c8 100644 --- a/enterprise/server/remote_execution/container/container.go +++ b/enterprise/server/remote_execution/container/container.go @@ -426,8 +426,6 @@ type FileSystemLayout struct { RemoteInstanceName string DigestFunction repb.DigestFunction_Value Inputs *repb.Tree - OutputDirs []string - OutputFiles []string } // CommandContainer provides an execution environment for commands. diff --git a/enterprise/server/remote_execution/containers/firecracker/BUILD b/enterprise/server/remote_execution/containers/firecracker/BUILD index 69d14746f90..6a357b5b096 100644 --- a/enterprise/server/remote_execution/containers/firecracker/BUILD +++ b/enterprise/server/remote_execution/containers/firecracker/BUILD @@ -144,11 +144,13 @@ go_test( "//enterprise/server/util/oci", "//proto:firecracker_go_proto", "//proto:remote_execution_go_proto", + "//proto:resource_go_proto", "//server/backends/disk_cache", "//server/interfaces", "//server/metrics", "//server/remote_cache/action_cache_server", "//server/remote_cache/byte_stream_server", + "//server/remote_cache/cachetools", "//server/remote_cache/content_addressable_storage_server", "//server/remote_cache/digest", "//server/resources", diff --git a/enterprise/server/remote_execution/containers/firecracker/firecracker.go b/enterprise/server/remote_execution/containers/firecracker/firecracker.go index c7bab707ff5..aa7162f354a 100644 --- a/enterprise/server/remote_execution/containers/firecracker/firecracker.go +++ b/enterprise/server/remote_execution/containers/firecracker/firecracker.go @@ -462,6 +462,7 @@ func (p *Provider) New(ctx context.Context, args *container.Init) (container.Com NumCpus: numCPUs, MemSizeMb: int64(math.Max(1.0, float64(sizeEstimate.GetEstimatedMemoryBytes())/1e6)), ScratchDiskSizeMb: int64(float64(sizeEstimate.GetEstimatedFreeDiskBytes()) / 1e6), + EnableVfs: platform.VFSEnabled() && platform.IsTrue(platform.FindEffectiveValue(args.Task.GetExecutionTask(), platform.EnableVFSPropertyName)), EnableLogging: platform.IsTrue(platform.FindEffectiveValue(args.Task.GetExecutionTask(), "debug-enable-vm-logs")), EnableNetworking: true, InitDockerd: args.Props.InitDockerd, @@ -900,7 +901,9 @@ func (c *FirecrackerContainer) saveSnapshot(ctx context.Context, snapshotDetails opts.ContainerFSPath = filepath.Join(c.getChroot(), containerFSName) opts.ChunkedFiles[scratchDriveID] = c.scratchStore } - opts.ChunkedFiles[workspaceDriveID] = c.workspaceStore + if c.fsLayout == nil { + opts.ChunkedFiles[workspaceDriveID] = c.workspaceStore + } } else { opts.ContainerFSPath = filepath.Join(c.getChroot(), containerFSName) opts.ScratchFSPath = filepath.Join(c.getChroot(), scratchFSName) @@ -1069,25 +1072,15 @@ func (c *FirecrackerContainer) LoadSnapshot(ctx context.Context) error { return err } + if err := c.startMachine(ctx, machine); err != nil { + return status.UnavailableErrorf("start machine: %s", err) + } + + // Now that the machine has started, make sure the context gets canceled + // if it crashes while we're loading the snapshot. ctx, cancel := c.monitorVMContext(ctx) defer cancel() - err = (func() error { - _, span := tracing.StartSpan(ctx) - defer span.End() - span.SetName("StartMachine") - // Note: using vmCtx here, which outlives the ctx above and is not - // cancelled until calling Remove(). - return machine.Start(vmCtx) - })() - if err != nil { - if cause := context.Cause(ctx); cause != nil { - return cause - } - return status.UnavailableErrorf("failed to start machine: %s", err) - } - c.machine = machine - if err := c.machine.ResumeVM(ctx); err != nil { if cause := context.Cause(ctx); cause != nil { return cause @@ -1113,6 +1106,59 @@ func (c *FirecrackerContainer) LoadSnapshot(ctx context.Context) error { return nil } +func (c *FirecrackerContainer) startMachine(ctx context.Context, machine *fcclient.Machine) error { + _, span := tracing.StartSpan(ctx) + defer span.End() + + vmCtx, cancelVmCtx := c.vmCtx, c.cancelVmCtx + + // Cancel machine startup if the task ctx is cancelled. We can't do this by + // just passing ctx to machine.Start(), since the Go SDK will terminate the + // VM *whenever* the ctx is cancelled, even after calling machine.Start(). + // This is not what we want - once the VM has started successfully, we want + // the VM to outlive the task context so that we can perform some + // cleanup/finalization work. + startupDone := make(chan struct{}) + go func() { + select { + case <-startupDone: + case <-ctx.Done(): + cancelVmCtx(ctx.Err()) + } + }() + + if err := machine.Start(vmCtx); err != nil { + if cause := context.Cause(vmCtx); cause != nil { + return cause + } + + return err + } + + c.machine = machine + + go func() { + // Cancel vmCtx once the machine exits, setting the context cause to the + // fatal init error if one occurred. + ctx := context.WithoutCancel(ctx) + + waitStatus := c.machine.Wait(ctx) + + if err := c.parseErrorFromVMLogTail(); err != nil { + cancelVmCtx(err) + return + } + if waitStatus != nil { + cancelVmCtx(fmt.Errorf("vm terminated: %w", waitStatus)) + return + } + + cancelVmCtx(fmt.Errorf("vm terminated")) + }() + + return nil +} + // initScratchImage creates the empty scratch ext4 disk for the VM. func (c *FirecrackerContainer) initScratchImage(ctx context.Context, path string) error { scratchDiskSizeBytes := ext4.MinDiskImageSizeBytes + minScratchDiskSizeBytes + c.vmConfig.ScratchDiskSizeMb*1e6 @@ -1398,7 +1444,7 @@ func getBootArgs(vmConfig *fcpb.VMConfiguration) string { if *EnableRootfs { initArgs = append(initArgs, "-enable_rootfs") } - if platform.VFSEnabled() { + if vmConfig.EnableVfs { initArgs = append(initArgs, "-enable_vfs") } if vmConfig.CgroupV2Only { @@ -1467,14 +1513,16 @@ func (c *FirecrackerContainer) getConfig(ctx context.Context, rootFS, containerF } // Workspace drive will be /dev/vdb if merged rootfs is enabled, /dev/vdc // otherwise. - cfg.Drives = append(cfg.Drives, []fcmodels.Drive{ - { - DriveID: fcclient.String(workspaceDriveID), - PathOnHost: &workspaceFS, - IsRootDevice: fcclient.Bool(false), - IsReadOnly: fcclient.Bool(false), - }, - }...) + if c.fsLayout == nil { + cfg.Drives = append(cfg.Drives, []fcmodels.Drive{ + { + DriveID: fcclient.String(workspaceDriveID), + PathOnHost: &workspaceFS, + IsRootDevice: fcclient.Bool(false), + IsReadOnly: fcclient.Bool(false), + }, + }...) + } if c.vmConfig.EnableNetworking { cfg.NetworkInterfaces = []fcclient.NetworkInterface{ @@ -1782,10 +1830,12 @@ func (c *FirecrackerContainer) setupVBDMounts(ctx context.Context) error { } else { c.scratchVBD = d } - if d, err := setup(workspaceDriveID, c.workspaceStore); err != nil { - return err - } else { - c.workspaceVBD = d + if c.fsLayout == nil { + if d, err := setup(workspaceDriveID, c.workspaceStore); err != nil { + return err + } else { + c.workspaceVBD = d + } } return nil } @@ -1810,7 +1860,7 @@ func (c *FirecrackerContainer) setupVFSServer(ctx context.Context) error { return err } if err := c.vfsServer.Start(lis); err != nil { - return status.InternalErrorf("Could not start VFS server: %s", err) + return status.UnavailableErrorf("start VFS server: %s", err) } return nil } @@ -1949,14 +1999,18 @@ func (c *FirecrackerContainer) create(ctx context.Context) error { // Create an empty workspace image initially; the real workspace will be // hot-swapped just before running each command in order to ensure that the // workspace contents are up to date. - if err := c.createWorkspaceImage(ctx, "" /*=workspaceDir*/, workspaceFSPath); err != nil { - return err + if c.fsLayout == nil { + if err := c.createWorkspaceImage(ctx, "" /*=workspaceDir*/, workspaceFSPath); err != nil { + return err + } } if *enableVBD { rootFSPath = filepath.Join(c.getChroot(), rootDriveID+vbdMountDirSuffix, vbd.FileName) scratchFSPath = filepath.Join(c.getChroot(), scratchDriveID+vbdMountDirSuffix, vbd.FileName) - workspaceFSPath = filepath.Join(c.getChroot(), workspaceDriveID+vbdMountDirSuffix, vbd.FileName) + if c.fsLayout == nil { + workspaceFSPath = filepath.Join(c.getChroot(), workspaceDriveID+vbdMountDirSuffix, vbd.FileName) + } } if err := c.setupNetworking(ctx); err != nil { @@ -1982,20 +2036,13 @@ func (c *FirecrackerContainer) create(ctx context.Context) error { m, err := fcclient.NewMachine(vmCtx, *fcCfg, machineOpts...) if err != nil { - return status.InternalErrorf("Failed creating machine: %s", err) + return status.UnavailableErrorf("create machine: %s", err) } log.CtxDebugf(ctx, "Command: %v", reflect.Indirect(reflect.Indirect(reflect.ValueOf(m)).FieldByName("cmd")).FieldByName("Args")) - err = (func() error { - _, span := tracing.StartSpan(ctx) - defer span.End() - span.SetName("StartMachine") - return m.Start(vmCtx) - })() - if err != nil { - return status.InternalErrorf("Failed starting machine: %s", err) + if err := c.startMachine(ctx, m); err != nil { + return status.UnavailableErrorf("start machine: %s", err) } - c.machine = m return nil } @@ -2084,12 +2131,6 @@ func (c *FirecrackerContainer) dialVMExecServer(ctx context.Context) (*grpc.Clie conn, err := vsock.SimpleGRPCDial(ctx, vsockPath, vsock.VMExecPort) if err != nil { if err := context.Cause(ctx); err != nil { - // If the context was cancelled for any reason (timed out or VM - // crashed), check the VM logs which might have more relevant crash - // info, otherwise return the context error. - if err := c.parseFatalInitError(); err != nil { - return nil, err - } // Intentionally not returning DeadlineExceededError here since it // is not a Bazel-retryable error, but this particular timeout // should be retryable. @@ -2106,10 +2147,10 @@ func (c *FirecrackerContainer) SendPrepareFileSystemRequestToGuest(ctx context.C p, err := vfs_server.NewCASLazyFileProvider(c.env, ctx, c.fsLayout.RemoteInstanceName, c.fsLayout.DigestFunction, c.fsLayout.Inputs) if err != nil { - return nil, err + return nil, status.WrapError(err, "initialize lazy filesystem") } if err := c.vfsServer.Prepare(p); err != nil { - return nil, err + return nil, status.WrapError(err, "prepare vfs server on host") } dialCtx, cancel := context.WithTimeout(ctx, vSocketDialTimeout) @@ -2118,12 +2159,15 @@ func (c *FirecrackerContainer) SendPrepareFileSystemRequestToGuest(ctx context.C vsockPath := filepath.Join(c.getChroot(), firecrackerVSockPath) conn, err := vsock.SimpleGRPCDial(dialCtx, vsockPath, vsock.VMVFSPort) if err != nil { - return nil, err + if cause := context.Cause(ctx); cause != nil { + return nil, cause + } + return nil, status.WrapError(err, "dial vfs server") } client := vmfspb.NewFileSystemClient(conn) rsp, err := client.Prepare(ctx, req) if err != nil { - return nil, err + return nil, status.WrapError(err, "prepare vfs") } return rsp, err } @@ -2190,8 +2234,8 @@ func (c *FirecrackerContainer) Exec(ctx context.Context, cmd *repb.Command, stdi } } else { req := &vmfspb.PrepareRequest{} - _, err := c.SendPrepareFileSystemRequestToGuest(ctx, req) - if err != nil { + log.CtxInfof(ctx, "Preparing guest VFS") + if _, err := c.SendPrepareFileSystemRequestToGuest(ctx, req); err != nil { result.Error = err return result } @@ -2709,21 +2753,22 @@ func (c *FirecrackerContainer) Stats(ctx context.Context) (*repb.UsageStats, err return &repb.UsageStats{}, nil } -// parseFatalInitError looks for a fatal error logged by the init binary, and +// parseErrorFromVMLogTail looks for a fatal error logged by the init binary, and // returns an InternalError with the fatal error message if one is found; // otherwise it returns nil. -func (c *FirecrackerContainer) parseFatalInitError() error { +func (c *FirecrackerContainer) parseErrorFromVMLogTail() error { tail := string(c.vmLog.Tail()) - if !strings.Contains(tail, fatalInitLogPrefix) { - return nil - } - // Logs contain "\r\n"; convert these to universal line endings. - tail = strings.ReplaceAll(tail, "\r\n", "\n") - lines := strings.Split(tail, "\n") - for _, line := range lines { - if m := fatalErrPattern.FindStringSubmatch(line); len(m) >= 1 { - return status.UnavailableErrorf("Firecracker VM crashed: %s", m[1]) + if strings.Contains(tail, fatalInitLogPrefix) { + // Logs contain "\r\n"; convert these to universal line endings. + tail = strings.ReplaceAll(tail, "\r\n", "\n") + lines := strings.Split(tail, "\n") + for _, line := range lines { + if m := fatalErrPattern.FindStringSubmatch(line); len(m) >= 1 { + return status.UnavailableErrorf("Firecracker VM crashed: %s", m[1]) + } } + } else if strings.Contains(tail, "Kernel panic - not syncing: Attempted to kill init! exitcode=") { + return status.UnavailableErrorf("kernel panic detected in guest VM") } return nil } diff --git a/enterprise/server/remote_execution/containers/firecracker/firecracker_test.go b/enterprise/server/remote_execution/containers/firecracker/firecracker_test.go index 52affbfeb2c..7c7ebfbc09c 100644 --- a/enterprise/server/remote_execution/containers/firecracker/firecracker_test.go +++ b/enterprise/server/remote_execution/containers/firecracker/firecracker_test.go @@ -33,7 +33,9 @@ import ( "github.com/buildbuddy-io/buildbuddy/server/metrics" "github.com/buildbuddy-io/buildbuddy/server/remote_cache/action_cache_server" "github.com/buildbuddy-io/buildbuddy/server/remote_cache/byte_stream_server" + "github.com/buildbuddy-io/buildbuddy/server/remote_cache/cachetools" "github.com/buildbuddy-io/buildbuddy/server/remote_cache/content_addressable_storage_server" + "github.com/buildbuddy-io/buildbuddy/server/remote_cache/digest" "github.com/buildbuddy-io/buildbuddy/server/resources" "github.com/buildbuddy-io/buildbuddy/server/testutil/testauth" "github.com/buildbuddy-io/buildbuddy/server/testutil/testdigest" @@ -55,6 +57,7 @@ import ( fcpb "github.com/buildbuddy-io/buildbuddy/proto/firecracker" repb "github.com/buildbuddy-io/buildbuddy/proto/remote_execution" + rspb "github.com/buildbuddy-io/buildbuddy/proto/resource" bspb "google.golang.org/genproto/googleapis/bytestream" ) @@ -512,6 +515,86 @@ func TestFirecrackerSnapshotAndResume(t *testing.T) { } } +func TestVFS(t *testing.T) { + flags.Set(t, "executor.enable_vfs", true) + + ctx := context.Background() + env := getTestEnv(ctx, t, envOpts{}) + rootDir := testfs.MakeTempDir(t) + + instanceName := "test-instance-name" + digestFunction := repb.DigestFunction_BLAKE3 + inputDir := testfs.MakeTempDir(t) + testfs.WriteAllFileContents(t, inputDir, map[string]string{ + "root_input.txt": "hello", + "unused_input.txt": "UNUSED", + "input_dir/child_input.txt": "world", + }) + inputRootDigest, _, err := cachetools.UploadDirectoryToCAS(ctx, env, instanceName, digestFunction, inputDir) + require.NoError(t, err) + inputRootRN := digest.NewResourceName(inputRootDigest, instanceName, rspb.CacheType_CAS, digestFunction) + inputTree, err := cachetools.GetTreeFromRootDirectoryDigest(ctx, env.GetContentAddressableStorageClient(), inputRootRN) + require.NoError(t, err) + + cmd := &repb.Command{ + Arguments: []string{"sh", "-ec", ` + cp root_input.txt root_output.txt + printf 'ooo' >> root_output.txt + + mkdir output_dir/child_dir/ + cp input_dir/child_input.txt output_dir/child_dir/child_output.txt + printf '!!!' >> output_dir/child_dir/child_output.txt + `}, + Platform: &repb.Platform{ + Properties: []*repb.Platform_Property{ + {Name: "enable-vfs", Value: "true"}, + }, + }, + } + + workDir := testfs.MakeDirAll(t, rootDir, "work") + // Output directories are expected to be pre-created by the executor. + testfs.MakeDirAll(t, workDir, "output_dir") + + opts := firecracker.ContainerOpts{ + ContainerImage: busyboxImage, + ActionWorkingDirectory: workDir, + VMConfiguration: &fcpb.VMConfiguration{ + NumCpus: 1, + MemSizeMb: 2500, + EnableNetworking: false, + ScratchDiskSizeMb: 100, + EnableVfs: true, + }, + ExecutorConfig: getExecutorConfig(t), + } + c, err := firecracker.NewContainer(ctx, env, &repb.ExecutionTask{ + Command: cmd, + }, opts) + require.NoError(t, err) + + fsLayout := &container.FileSystemLayout{ + RemoteInstanceName: instanceName, + DigestFunction: digestFunction, + Inputs: inputTree, + } + c.SetTaskFileSystemLayout(fsLayout) + + res := c.Run(ctx, cmd, opts.ActionWorkingDirectory, oci.Credentials{}) + require.NoError(t, res.Error) + + assertCommandResult(t, &interfaces.CommandResult{ExitCode: 0}, res) + testfs.AssertExactFileContents(t, workDir, map[string]string{ + // Only inputs that were accessed should have been downloaded to the + // workspace. + "root_input.txt": "hello", + "input_dir/child_input.txt": "world", + // Outputs should be present in the workspace. + "root_output.txt": "helloooo", + "output_dir/child_dir/child_output.txt": "world!!!", + }) +} + func TestFirecracker_LocalSnapshotSharing(t *testing.T) { if !*snaputil.EnableLocalSnapshotSharing { t.Skip("Snapshot sharing is not enabled") diff --git a/enterprise/server/remote_execution/platform/platform.go b/enterprise/server/remote_execution/platform/platform.go index bc819b1a64c..1d5f8b6ed52 100644 --- a/enterprise/server/remote_execution/platform/platform.go +++ b/enterprise/server/remote_execution/platform/platform.go @@ -85,7 +85,7 @@ const ( workloadIsolationPropertyName = "workload-isolation-type" initDockerdPropertyName = "init-dockerd" enableDockerdTCPPropertyName = "enable-dockerd-tcp" - enableVFSPropertyName = "enable-vfs" + EnableVFSPropertyName = "enable-vfs" HostedBazelAffinityKeyPropertyName = "hosted-bazel-affinity-key" useSelfHostedExecutorsPropertyName = "use-self-hosted-executors" disableMeasuredTaskSizePropertyName = "debug-disable-measured-task-size" @@ -275,7 +275,7 @@ func ParseProperties(task *repb.ExecutionTask) (*Properties, error) { } // Only Enable VFS if it is also enabled via flags - vfsEnabled := boolProp(m, enableVFSPropertyName, false) && *enableVFS + vfsEnabled := boolProp(m, EnableVFSPropertyName, false) && *enableVFS envOverrides := stringListProp(m, EnvOverridesPropertyName) for _, prop := range stringListProp(m, EnvOverridesBase64PropertyName) { diff --git a/enterprise/server/remote_execution/runner/runner.go b/enterprise/server/remote_execution/runner/runner.go index db760e903d0..3438a92f83b 100644 --- a/enterprise/server/remote_execution/runner/runner.go +++ b/enterprise/server/remote_execution/runner/runner.go @@ -266,8 +266,6 @@ func (r *taskRunner) DownloadInputs(ctx context.Context, ioStats *repb.IOStats) RemoteInstanceName: r.task.GetExecuteRequest().GetInstanceName(), DigestFunction: r.task.GetExecuteRequest().GetDigestFunction(), Inputs: inputTree, - OutputDirs: r.task.GetCommand().GetOutputDirectories(), - OutputFiles: r.task.GetCommand().GetOutputFiles(), } if err := r.prepareVFS(ctx, layout); err != nil { diff --git a/enterprise/tools/vmstart/vmstart.go b/enterprise/tools/vmstart/vmstart.go index dd715a48d9e..57b4f33d047 100644 --- a/enterprise/tools/vmstart/vmstart.go +++ b/enterprise/tools/vmstart/vmstart.go @@ -324,8 +324,6 @@ func run(ctx context.Context, env environment.Env) error { c.SetTaskFileSystemLayout(&container.FileSystemLayout{ RemoteInstanceName: *remoteInstanceName, Inputs: tree, - OutputDirs: cmd.GetOutputDirectories(), - OutputFiles: cmd.GetOutputFiles(), }) _, err = c.SendPrepareFileSystemRequestToGuest(ctx, &vmfspb.PrepareRequest{}) diff --git a/proto/firecracker.proto b/proto/firecracker.proto index ae080368210..95b1eb349de 100644 --- a/proto/firecracker.proto +++ b/proto/firecracker.proto @@ -15,6 +15,7 @@ message VMConfiguration { int64 num_cpus = 1; int64 mem_size_mb = 2; int64 scratch_disk_size_mb = 3; + bool enable_vfs = 15; bool enable_networking = 4; bool init_dockerd = 5; bool debug_mode = 6;