Skip to content

Commit

Permalink
ci: make tests more reliable and debuggable (#759)
Browse files Browse the repository at this point in the history
Issue #, if available: The most common case for macOS tests failing
seems to be vm init or vm start failing (or, partially failing), see:
https://github.com/runfinch/finch/actions/runs/7493200344/job/20398390952,
https://github.com/runfinch/finch/actions/runs/7493150724/job/20398225275,
https://github.com/runfinch/finch/actions/runs/7493910137/job/20400678411

*Description of changes:*
- Adds a delay between vm state changes. I'm not sure if this will fix
the issue, but its worth a try

*Testing done:*



- [x] I've reviewed the guidance in CONTRIBUTING.md


#### License Acceptance

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.

---------

Signed-off-by: Justin Alvarez <[email protected]>
  • Loading branch information
pendo324 authored Jan 22, 2024
1 parent ec9d697 commit 673c2a5
Show file tree
Hide file tree
Showing 23 changed files with 126 additions and 65 deletions.
20 changes: 14 additions & 6 deletions .github/workflows/ci-docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,22 @@ jobs:
e2e-tests:
strategy:
matrix:
os:
os:
[
[self-hosted, macos, amd64, 13, test],
[self-hosted, macos, amd64, 14, test],
[self-hosted, macos, arm64, 13, test],
[self-hosted, macos, arm64, 14, test]
[self-hosted, macos, amd64, 13, test],
[self-hosted, macos, amd64, 14, test],
[self-hosted, macos, arm64, 13, test],
[self-hosted, macos, arm64, 14, test],
]
runs-on: ${{ matrix.os }}
runs-on: ${{ matrix.os }}
steps:
- run: echo "Skipping CI for docs & contrib files"
windows-e2e-tests:
strategy:
matrix:
os: [[self-hosted, windows, amd64, test]]
test-command: ['test-e2e-vm-serial', 'test-e2e-container']
runs-on: ${{ matrix.os }}
steps:
- run: echo "Skipping CI for docs & contrib files"
mdlint:
Expand Down
20 changes: 11 additions & 9 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ permissions:
id-token: write
contents: write

env:
DEBUG: ${{ secrets.ACTIONS_STEP_DEBUG }}

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
Expand Down Expand Up @@ -97,7 +100,7 @@ jobs:
- name: Run ShellCheck
uses: ludeeus/action-shellcheck@00cae500b08a931fb5698e11e79bfbd38e612a38 # 2.0.0
with:
version: v0.9.0
version: v0.9.0
continue-on-error: true
go-mod-tidy-check:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -128,8 +131,9 @@ jobs:
[self-hosted, macos, amd64, 13, test],
[self-hosted, macos, amd64, 14, test],
[self-hosted, macos, arm64, 13, test],
[self-hosted, macos, arm64, 14, test]
[self-hosted, macos, arm64, 14, test],
]
test-command: ['test-e2e-vm-serial', 'test-e2e-container']
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
Expand Down Expand Up @@ -173,16 +177,14 @@ jobs:
- run: |
git status
git clean -f -d
REGISTRY=${{ steps.vars.outputs.has_creds == true && env.REGISTRY || '' }} make test-e2e
REGISTRY=${{ steps.vars.outputs.has_creds == true && env.REGISTRY || '' }} make ${{ matrix.test-command }}
shell: zsh {0}
windows-e2e-tests:
strategy:
fail-fast: false
matrix:
os:
[
[self-hosted, windows, amd64, test],
]
os: [[self-hosted, windows, amd64, test]]
test-command: ['test-e2e-vm-serial', 'test-e2e-container']
runs-on: ${{ matrix.os }}
timeout-minutes: 180
steps:
Expand Down Expand Up @@ -236,10 +238,10 @@ jobs:
# set networking config option to allow for VM/container -> host communication
echo "[experimental]`nnetworkingMode=mirrored`nhostAddressLoopback=true" > C:\Users\Administrator\.wslconfig
git status
git clean -f -d
make test-e2e
make ${{ matrix.test-command }}
- name: Remove Finch VM and Clean Up Previous Environment
if: ${{ always() }}
run: |
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -328,10 +328,10 @@ test-unit:
# Container tests and VM tests can be run in any order, but they must be run sequentially.
# For more details, see the package-level comment of the e2e package.
.PHONY: test-e2e
test-e2e: test-e2e-vm-serial
test-e2e: test-e2e-vm-serial test-e2e-container

.PHONY: test-e2e-vm-serial
test-e2e-vm-serial: test-e2e-container
test-e2e-vm-serial:
go test -ldflags $(LDFLAGS) -timeout 2h ./e2e/vm -test.v -ginkgo.v -ginkgo.timeout=2h --installed="$(INSTALLED)"

.PHONY: test-e2e-container
Expand Down
2 changes: 2 additions & 0 deletions cmd/finch/virtual_machine_init.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,9 @@ func (iva *initVMAction) run() error {
iva.logger.Info("Initializing and starting Finch virtual machine...")
logs, err := limaCmd.CombinedOutput()
if err != nil {
iva.logger.SetFormatter(flog.TextWithoutTruncation)
iva.logger.Errorf("Finch virtual machine failed to start, debug logs:\n%s", logs)
iva.logger.SetFormatter(flog.Text)
// ignore error, this is to ensure that the disk mount doesn't linger after the VM fails to start
_ = iva.diskManager.DetachUserDataDisk()
return err
Expand Down
3 changes: 3 additions & 0 deletions cmd/finch/virtual_machine_init_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"

"github.com/runfinch/finch/pkg/dependency"
"github.com/runfinch/finch/pkg/flog"
"github.com/runfinch/finch/pkg/mocks"

"github.com/golang/mock/gomock"
Expand Down Expand Up @@ -292,7 +293,9 @@ func TestInitVMAction_run(t *testing.T) {
mockBaseYamlFilePath, "--tty=false").Return(command)

logger.EXPECT().Info("Initializing and starting Finch virtual machine...")
logger.EXPECT().SetFormatter(flog.TextWithoutTruncation)
logger.EXPECT().Errorf("Finch virtual machine failed to start, debug logs:\n%s", logs)
logger.EXPECT().SetFormatter(flog.Text)
dm.EXPECT().DetachUserDataDisk().Return(nil)
},
},
Expand Down
2 changes: 2 additions & 0 deletions cmd/finch/virtual_machine_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ func (sva *startVMAction) run() error {
sva.logger.Info("Starting existing Finch virtual machine...")
logs, err := limaCmd.CombinedOutput()
if err != nil {
sva.logger.SetFormatter(flog.TextWithoutTruncation)
sva.logger.Errorf("Finch virtual machine failed to start, debug logs:\n%s", logs)
sva.logger.SetFormatter(flog.Text)
return err
}
sva.logger.Info("Finch virtual machine started successfully")
Expand Down
3 changes: 3 additions & 0 deletions cmd/finch/virtual_machine_start_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"

"github.com/runfinch/finch/pkg/dependency"
"github.com/runfinch/finch/pkg/flog"
"github.com/runfinch/finch/pkg/mocks"

"github.com/golang/mock/gomock"
Expand Down Expand Up @@ -305,7 +306,9 @@ func TestStartVMAction_run(t *testing.T) {
lcc.EXPECT().CreateWithoutStdio("start", limaInstanceName).Return(command)

logger.EXPECT().Info("Starting existing Finch virtual machine...")
logger.EXPECT().SetFormatter(flog.TextWithoutTruncation)
logger.EXPECT().Errorf("Finch virtual machine failed to start, debug logs:\n%s", logs)
logger.EXPECT().SetFormatter(flog.Text)
},
},
}
Expand Down
13 changes: 10 additions & 3 deletions e2e/container/container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"regexp"
"runtime"
"testing"
"time"

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
Expand All @@ -28,14 +29,20 @@ func TestContainer(t *testing.T) {
}

ginkgo.SynchronizedBeforeSuite(func() []byte {
command.New(o, "vm", "init").WithTimeoutInSeconds(600).Run()
command.New(o, "vm", "stop", "-f").WithoutCheckingExitCode().WithTimeoutInSeconds(30).Run()
time.Sleep(1 * time.Second)
command.New(o, "vm", "remove", "-f").WithoutCheckingExitCode().WithTimeoutInSeconds(20).Run()
time.Sleep(1 * time.Second)
command.New(o, "vm", "init").WithoutCheckingExitCode().WithTimeoutInSeconds(160).Run()
tests.SetupLocalRegistry(o)
return nil
}, func(bytes []byte) {})

ginkgo.SynchronizedAfterSuite(func() {
command.New(o, "vm", "stop").WithTimeoutInSeconds(90).Run()
command.New(o, "vm", "remove").WithTimeoutInSeconds(60).Run()
command.New(o, "vm", "stop", "-f").WithoutCheckingExitCode().WithTimeoutInSeconds(30).Run()
time.Sleep(1 * time.Second)
command.New(o, "vm", "remove", "-f").WithoutCheckingExitCode().WithTimeoutInSeconds(20).Run()
time.Sleep(1 * time.Second)
}, func() {})

ginkgo.Describe(description, func() {
Expand Down
8 changes: 7 additions & 1 deletion e2e/e2e.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,13 @@ func CreateOption() (*option.Option, error) {
subject = InstalledTestSubject
}

o, err := option.New([]string{subject})
args := []string{subject}
debug := os.Getenv("DEBUG")
if len(debug) != 0 {
args = append(args, "--debug")
}

o, err := option.New(args)
if err != nil {
return nil, fmt.Errorf("failed to initialize a testing option: %w", err)
}
Expand Down
16 changes: 9 additions & 7 deletions e2e/vm/additional_disk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ var testAdditionalDisk = func(o *option.Option, installed bool) {
ginkgo.It("Retains container user data after the VM is deleted", func() {
resetVM(o, installed)
resetDisks(o, installed)
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
command.New(o, virtualMachineRootCmd, "init").WithoutCheckingExitCode().WithTimeoutInSeconds(160).Run()
command.Run(o, "volume", "create", volumeName)
ginkgo.DeferCleanup(command.Run, o, "volume", "rm", volumeName)
command.Run(o, "network", "create", networkName)
Expand All @@ -38,12 +38,12 @@ var testAdditionalDisk = func(o *option.Option, installed bool) {
ginkgo.DeferCleanup(command.Run, o, "rm", "-f", containerName)

command.New(o, "stop", containerName).WithTimeoutInSeconds(30).Run()

time.Sleep(20 * time.Second)

command.New(o, virtualMachineRootCmd, "stop").WithTimeoutInSeconds(90).Run()
command.Run(o, virtualMachineRootCmd, "remove")
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(240).Run()
command.New(o, virtualMachineRootCmd, "stop").WithoutCheckingExitCode().WithTimeoutInSeconds(30).Run()
time.Sleep(1 * time.Second)
command.New(o, virtualMachineRootCmd, "remove").WithoutCheckingExitCode().WithTimeoutInSeconds(20).Run()
time.Sleep(1 * time.Second)
command.New(o, virtualMachineRootCmd, "init").WithoutCheckingExitCode().WithTimeoutInSeconds(160).Run()

imageOutput := command.StdoutAsLines(o, "images", "--format", "{{.Name}}")
gomega.Expect(imageOutput).Should(gomega.ContainElement(savedImage))
Expand All @@ -60,7 +60,9 @@ var testAdditionalDisk = func(o *option.Option, installed bool) {
gomega.Expect(networkOutput).Should(gomega.ContainElement(networkName))

command.Run(o, "start", containerName)
gomega.Expect(command.StdoutStr(o, "exec", containerName, "cat", "/tmp/test.txt")).
gomega.Eventually(command.StdoutStr(o, "exec", containerName, "cat", "/tmp/test.txt")).
WithTimeout(15 * time.Second).
WithPolling(1 * time.Second).
Should(gomega.Equal("foo"))
})
})
Expand Down
6 changes: 3 additions & 3 deletions e2e/vm/config_darwin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"github.com/lima-vm/lima/pkg/limayaml"
"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"github.com/onsi/gomega/gexec"
"github.com/runfinch/common-tests/command"
"github.com/runfinch/common-tests/option"
"gopkg.in/yaml.v3"
Expand All @@ -33,9 +32,10 @@ var testConfig = func(o *option.Option, installed bool) {
}

limaConfigFilePath := resetVM(o, installed)
resetDisks(o, installed)
writeFile(finchConfigFilePath, []byte("memory: 4GiB\ncpus: 6\nvmType: vz\nrosetta: false"))
initCmdSession := command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
gomega.Expect(initCmdSession).Should(gexec.Exit(0))
// vm init with VZ set sometimes takes 2 minutes just to convert the disk to raw
command.New(o, "vm", "init").WithoutCheckingExitCode().WithTimeoutInSeconds(240).Run()

gomega.Expect(limaConfigFilePath).Should(gomega.BeARegularFile())
cfgBuf, err := os.ReadFile(filepath.Clean(limaConfigFilePath))
Expand Down
3 changes: 3 additions & 0 deletions e2e/vm/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"os/exec"
"path/filepath"
"time"

"github.com/lima-vm/lima/pkg/limayaml"
"github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -42,6 +43,7 @@ func updateAndApplyConfig(o *option.Option, configBytes []byte) *gexec.Session {
writeFile(finchConfigFilePath, configBytes)

command.New(o, virtualMachineRootCmd, "stop").WithoutCheckingExitCode().WithTimeoutInSeconds(90).Run()
time.Sleep(1 * time.Second)
return command.New(o, virtualMachineRootCmd, "start").WithoutCheckingExitCode().WithTimeoutInSeconds(240).Run()
}

Expand Down Expand Up @@ -78,6 +80,7 @@ var _ = func(o *option.Option, installed bool) {
writeFile(limaConfigFilePath, origLimaCfg)

command.New(o, virtualMachineRootCmd, "stop").WithoutCheckingExitCode().WithTimeoutInSeconds(90).Run()
time.Sleep(1 * time.Second)
command.New(o, virtualMachineRootCmd, "start").WithTimeoutInSeconds(240).Run()
})
})
Expand Down
4 changes: 1 addition & 3 deletions e2e/vm/cred_helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (

"github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
"github.com/onsi/gomega/gexec"
"github.com/runfinch/common-tests/command"
"github.com/runfinch/common-tests/option"
)
Expand All @@ -33,8 +32,7 @@ var testCredHelper = func(o *option.Option, installed bool, registry string) {
resetDisks(o, installed)
writeFile(finchConfigFilePath, []byte(fmt.Sprintf("cpus: 6\nmemory: 4GiB\ncreds_helpers:\n "+
"- ecr-login\nvmType: %s\nrosetta: true", vmType)))
initCmdSession := command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
gomega.Expect(initCmdSession).Should(gexec.Exit(0))
command.New(o, virtualMachineRootCmd, "init").WithoutCheckingExitCode().WithTimeoutInSeconds(160).Run()
command.New(o, "pull", registry).WithTimeoutInSeconds(600).Run()
gomega.Expect(command.Stdout(o, "images", "-q", registry)).NotTo(gomega.BeEmpty())
})
Expand Down
22 changes: 14 additions & 8 deletions e2e/vm/lifecycle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ var testVMLifecycle = func(o *option.Option) {

ginkgo.It("should be able to force remove the virtual machine", func() {
command.Run(o, "images")
command.New(o, virtualMachineRootCmd, "remove", "--force").WithTimeoutInSeconds(60).Run()
command.New(o, virtualMachineRootCmd, "remove", "--force").WithTimeoutInSeconds(160).Run()
command.RunWithoutSuccessfulExit(o, "images")
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(160).Run()
})

ginkgo.It("should be able to stop the virtual machine", func() {
Expand All @@ -54,20 +54,26 @@ var testVMLifecycle = func(o *option.Option) {
gomega.Expect(command.StdoutStr(o, virtualMachineRootCmd, "status")).To(gomega.Equal("Stopped"))
})

ginkgo.It("should be able to start the virtual machine", func() {
command.New(o, virtualMachineRootCmd, "start").WithTimeoutInSeconds(240).Run()
ginkgo.It("should be able to start the virtual machine", ginkgo.FlakeAttempts(3), func() {
// TODO: Remove FlakeAttempts
// vm start should happen in around 20 seconds if everything is working as expected
// sometimes it fails, but the failure timeout is 1 minute. Clamping to 30 seconds and
// allowing 3 tries will still be faster than the previous behavior.
command.New(o, virtualMachineRootCmd, "start").WithTimeoutInSeconds(30).Run()
command.Run(o, "images")
command.New(o, virtualMachineRootCmd, "stop").WithTimeoutInSeconds(90).Run()
})

ginkgo.It("should be able to remove the virtual machine", func() {
command.New(o, virtualMachineRootCmd, "remove").WithTimeoutInSeconds(60).Run()
// don't asssume the VM will be in a stopped state (e.g. if the previous test fails)
command.New(o, virtualMachineRootCmd, "stop", "--force").WithTimeoutInSeconds(90).Run()
command.New(o, virtualMachineRootCmd, "remove").WithTimeoutInSeconds(160).Run()
command.RunWithoutSuccessfulExit(o, "images")
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(160).Run()
command.New(o, virtualMachineRootCmd, "stop").WithTimeoutInSeconds(90).Run()
})
ginkgo.It("should be able to force remove the virtual machine", func() {
command.New(o, virtualMachineRootCmd, "remove", "--force").WithTimeoutInSeconds(60).Run()
command.New(o, virtualMachineRootCmd, "remove", "--force").WithTimeoutInSeconds(160).Run()
command.RunWithoutSuccessfulExit(o, "images")
})
})
Expand All @@ -83,7 +89,7 @@ var testVMLifecycle = func(o *option.Option) {
})

ginkgo.It("should be able to init", func() {
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(160).Run()
command.Run(o, "images")
})
})
Expand Down
6 changes: 3 additions & 3 deletions e2e/vm/soci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ var testSoci = func(o *option.Option, installed bool) {
resetDisks(o, installed)
writeFile(finchConfigFilePath, []byte(fmt.Sprintf("cpus: 6\nmemory: 4GiB\nsnapshotters:\n "+
"- soci\nvmType: %s\nrosetta: false", vmType)))
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
command.New(o, virtualMachineRootCmd, "init").WithoutCheckingExitCode().WithTimeoutInSeconds(160).Run()
command.New(o, "pull", "--snapshotter=soci", ffmpegSociImage).WithTimeoutInSeconds(30).Run()
finchPullMounts := countMounts(limactlO)
command.Run(o, "rmi", "-f", ffmpegSociImage)
Expand All @@ -81,7 +81,7 @@ var testSoci = func(o *option.Option, installed bool) {
resetDisks(o, installed)
writeFile(finchConfigFilePath, []byte(fmt.Sprintf("cpus: 6\nmemory: 4GiB\nsnapshotters:\n "+
"- soci\nvmType: %s\nrosetta: false", vmType)))
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
command.New(o, virtualMachineRootCmd, "init").WithoutCheckingExitCode().WithTimeoutInSeconds(160).Run()
command.New(o, "run", "--snapshotter=soci", ffmpegSociImage).WithTimeoutInSeconds(30).Run()
finchPullMounts := countMounts(limactlO)
command.Run(o, "rmi", "-f", ffmpegSociImage)
Expand All @@ -96,7 +96,7 @@ var testSoci = func(o *option.Option, installed bool) {
resetDisks(o, installed)
writeFile(finchConfigFilePath, []byte(fmt.Sprintf("cpus: 6\nmemory: 4GiB\nsnapshotters:\n "+
"- soci\nvmType: %s\nrosetta: false", vmType)))
command.New(o, virtualMachineRootCmd, "init").WithTimeoutInSeconds(600).Run()
command.New(o, virtualMachineRootCmd, "init").WithoutCheckingExitCode().WithTimeoutInSeconds(160).Run()
port = fnet.GetFreePort()
command.New(o, "run", "-dp", fmt.Sprintf("%d:5000", port), "--name", "registry", registryImage).
WithTimeoutInSeconds(30).Run()
Expand Down
Loading

0 comments on commit 673c2a5

Please sign in to comment.