From fb2aae58636ed920e1c399ded08994d510ba0c4b Mon Sep 17 00:00:00 2001 From: Guillermo Gaston Date: Fri, 7 Jun 2024 02:41:11 +0000 Subject: [PATCH] Add command to cleanup tinkerbell test resources --- cmd/integration_test/cmd/cleanuptinkerbell.go | 156 ++++++++++++++++++ internal/pkg/ssm/activation.go | 37 ++++- internal/pkg/ssm/instance.go | 41 ++++- internal/test/cleanup/cleanup.go | 89 ---------- internal/test/cleanup/tinkerbell.go | 100 +++++++++++ internal/test/e2e/run.go | 43 +---- .../e2e/{testRunner.go => test_runner.go} | 89 ++++++---- internal/test/e2e/tinkerbell.go | 77 +++++++++ pkg/executables/govc.go | 49 +++++- pkg/executables/govc_test.go | 59 +++++++ test/framework/tinkerbell.go | 2 +- 11 files changed, 564 insertions(+), 178 deletions(-) create mode 100644 cmd/integration_test/cmd/cleanuptinkerbell.go create mode 100644 internal/test/cleanup/tinkerbell.go rename internal/test/e2e/{testRunner.go => test_runner.go} (82%) diff --git a/cmd/integration_test/cmd/cleanuptinkerbell.go b/cmd/integration_test/cmd/cleanuptinkerbell.go new file mode 100644 index 0000000000000..2ce85427ad7c7 --- /dev/null +++ b/cmd/integration_test/cmd/cleanuptinkerbell.go @@ -0,0 +1,156 @@ +package cmd + +import ( + "context" + "fmt" + "log" + + "github.com/aws/aws-sdk-go/aws/session" + "github.com/spf13/cobra" + + "github.com/aws/eks-anywhere/internal/pkg/ssm" + "github.com/aws/eks-anywhere/internal/test/cleanup" + "github.com/aws/eks-anywhere/internal/test/e2e" + "github.com/aws/eks-anywhere/pkg/dependencies" + "github.com/aws/eks-anywhere/pkg/errors" + "github.com/aws/eks-anywhere/pkg/executables" + "github.com/aws/eks-anywhere/pkg/logger" + "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" +) + +var cleanUpTinkerbellCmd = &cobra.Command{ + Use: "tinkerbell", + Short: "Clean up tinkerbell e2e resources", + Long: "Deletes vms created for e2e testing on vsphere and powers off metal machines", + SilenceUsage: true, + PreRun: preRunCleanUpNutanixSetup, + RunE: func(cmd *cobra.Command, _ []string) error { + return cleanUpTinkerbellTestResources(cmd.Context()) + }, +} + +var ( + storageBucket string + instanceConfig string + dryRun bool +) + +func init() { + cleanUpInstancesCmd.AddCommand(cleanUpTinkerbellCmd) + cleanUpTinkerbellCmd.Flags().StringVarP(&storageBucket, storageBucketFlagName, "s", "", "S3 bucket name where tinkerbell hardware inventory files are stored") + runE2ECmd.Flags().StringVar(&instanceConfig, instanceConfigFlagName, "", "File path to the instance-config.yml config") + cleanUpTinkerbellCmd.Flags().BoolVar(&dryRun, "dry-run", false, "Run command without deleting or powering off any resources") + + if err := cleanUpTinkerbellCmd.MarkFlagRequired(storageBucketFlagName); err != nil { + log.Fatalf("Error marking flag %s as required: %v", storageBucketFlagName, err) + } + + if err := cleanUpTinkerbellCmd.MarkFlagRequired(instanceConfigFlagName); err != nil { + log.Fatalf("Error marking flag %s as required: %v", instanceConfigFlagName, err) + } +} + +// cleanUpTinkerbellTestResources deletes any test runner vm in vsphere and powers off all metal machines. +func cleanUpTinkerbellTestResources(ctx context.Context) error { + session, err := session.NewSession() + if err != nil { + return fmt.Errorf("creating session: %w", err) + } + + deps, err := dependencies.NewFactory().WithGovc().Build(ctx) + if err != nil { + return err + } + defer deps.Close(ctx) + govc := deps.Govc + + infraConfig, err := e2e.ReadRunnerConfig(instanceConfig) + if err != nil { + return fmt.Errorf("reading vms config for tests: %v", err) + } + + govc.Configure( + executables.GovcConfig{ + Username: infraConfig.Username, + Password: infraConfig.Password, + URL: infraConfig.URL, + Insecure: infraConfig.Insecure, + Datacenter: infraConfig.Datacenter, + }, + ) + + var errs []error + + if err := deleteSSMInstances(ctx, session); len(err) != 0 { + errs = append(errs, err...) + } + + if err := deleteRunners(ctx, govc, infraConfig.Folder); len(err) != 0 { + errs = append(errs, err...) + } + + if err := powerOffMachines(ctx, session); len(err) != 0 { + errs = append(errs, err...) + } + + return errors.NewAggregate(errs) +} + +func deleteSSMInstances(ctx context.Context, session *session.Session) []error { + var errs []error + if ssmInstances, err := e2e.ListTinkerbellSSMInstances(ctx, session); err != nil { + errs = append(errs, fmt.Errorf("listing ssm instances: %w", err)) + } else if dryRun { + logger.Info("Found SSM instances", "instanceIDs", ssmInstances.InstanceIDs, "activationIDs", ssmInstances.ActivationIDs) + } else { + if _, err := ssm.DeregisterInstances(session, ssmInstances.InstanceIDs...); err != nil { + errs = append(errs, fmt.Errorf("deleting ssm instances: %w", err)) + } + if _, err := ssm.DeleteActivations(session, ssmInstances.ActivationIDs...); err != nil { + errs = append(errs, fmt.Errorf("deleting ssm activations: %w", err)) + } + } + + return errs +} + +func deleteRunners(ctx context.Context, govc *executables.Govc, folder string) []error { + var errs []error + if runners, err := govc.ListVMs(ctx, folder); err != nil { + errs = append(errs, fmt.Errorf("listing tinkerbell runners: %w", err)) + } else if dryRun { + logger.Info("Found VM Runners", "vms", runners) + } else { + for _, vm := range runners { + if err := govc.DeleteVM(ctx, vm.Path); err != nil { + errs = append(errs, fmt.Errorf("deleting tinkerbell runner %s: %w", vm, err)) + } + } + } + + return errs +} + +func powerOffMachines(_ context.Context, session *session.Session) []error { + var errs []error + if machines, err := e2e.ReadTinkerbellMachinePool(session, storageBucket); err != nil { + errs = append(errs, fmt.Errorf("reading tinkerbell machine pool: %v", err)) + } else if dryRun { + logger.Info("Metal machine pool", "machines", names(machines)) + } else { + if err = cleanup.PowerOffTinkerbellMachines(machines, true); err != nil { + errs = append(errs, fmt.Errorf("powering off tinkerbell machines: %v", err)) + } + } + + return errs +} + +func names(h []*hardware.Machine) []string { + names := make([]string, 0, len(h)) + for _, m := range h { + names = append(names, m.Hostname) + } + + return names +} diff --git a/internal/pkg/ssm/activation.go b/internal/pkg/ssm/activation.go index b92ca48d47079..2d1f0089d0eac 100644 --- a/internal/pkg/ssm/activation.go +++ b/internal/pkg/ssm/activation.go @@ -3,6 +3,7 @@ package ssm import ( "fmt" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ssm" ) @@ -12,7 +13,14 @@ type ActivationInfo struct { ActivationID string } -func CreateActivation(session *session.Session, instanceName, role string) (*ActivationInfo, error) { +// Tag is an SSM tag. +type Tag struct { + Key string + Value string +} + +// CreateActivation creates an SSM Hybrid activation. +func CreateActivation(session *session.Session, instanceName, role string, tags ...Tag) (*ActivationInfo, error) { s := ssm.New(session) request := ssm.CreateActivationInput{ @@ -21,6 +29,12 @@ func CreateActivation(session *session.Session, instanceName, role string) (*Act IamRole: &role, } + for _, tag := range tags { + request.Tags = append(request.Tags, + &ssm.Tag{Key: aws.String(tag.Key), Value: aws.String(tag.Value)}, + ) + } + result, err := s.CreateActivation(&request) if err != nil { return nil, fmt.Errorf("failed to activate ssm instance %s: %v", instanceName, err) @@ -29,17 +43,22 @@ func CreateActivation(session *session.Session, instanceName, role string) (*Act return &ActivationInfo{ActivationCode: *result.ActivationCode, ActivationID: *result.ActivationId}, nil } -func DeleteActivation(session *session.Session, activationId string) (*ssm.DeleteActivationOutput, error) { +// DeleteActivations deletes SSM activations. +func DeleteActivations(session *session.Session, ids ...string) ([]*ssm.DeleteActivationOutput, error) { s := ssm.New(session) + var outputs []*ssm.DeleteActivationOutput + for _, id := range ids { + request := ssm.DeleteActivationInput{ + ActivationId: &id, + } - request := ssm.DeleteActivationInput{ - ActivationId: &activationId, - } + result, err := s.DeleteActivation(&request) + if err != nil { + return nil, fmt.Errorf("failed to delete ssm activation: %v", err) + } - result, err := s.DeleteActivation(&request) - if err != nil { - return nil, fmt.Errorf("failed to delete ssm activation: %v", err) + outputs = append(outputs, result) } - return result, nil + return outputs, nil } diff --git a/internal/pkg/ssm/instance.go b/internal/pkg/ssm/instance.go index 6fd2b4b80ae24..d04d90a071b3d 100644 --- a/internal/pkg/ssm/instance.go +++ b/internal/pkg/ssm/instance.go @@ -1,8 +1,10 @@ package ssm import ( + "context" "fmt" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ssm" ) @@ -30,16 +32,43 @@ func GetInstanceByActivationId(session *session.Session, id string) (*ssm.Instan return infoList[0], nil } -func DeregisterInstance(session *session.Session, id string) (*ssm.DeregisterManagedInstanceOutput, error) { +// DeregisterInstances deregisters SSM instances. +func DeregisterInstances(session *session.Session, ids ...string) ([]*ssm.DeregisterManagedInstanceOutput, error) { s := ssm.New(session) - input := ssm.DeregisterManagedInstanceInput{ - InstanceId: &id, + var outputs []*ssm.DeregisterManagedInstanceOutput + for _, id := range ids { + input := ssm.DeregisterManagedInstanceInput{ + InstanceId: &id, + } + + output, err := s.DeregisterManagedInstance(&input) + if err != nil { + return nil, fmt.Errorf("failed to deregister ssm instance %s: %v", id, err) + } + + outputs = append(outputs, output) } - output, err := s.DeregisterManagedInstance(&input) + return outputs, nil +} + +func ListInstancesByTags(ctx context.Context, session *session.Session, tags ...Tag) ([]*ssm.InstanceInformation, error) { + s := ssm.New(session) + input := ssm.DescribeInstanceInformationInput{ + Filters: make([]*ssm.InstanceInformationStringFilter, 0, len(tags)), + } + + for _, tag := range tags { + input.Filters = append(input.Filters, &ssm.InstanceInformationStringFilter{ + Key: aws.String("tag:" + tag.Key), + Values: aws.StringSlice([]string{tag.Value}), + }) + } + + output, err := s.DescribeInstanceInformation(&input) if err != nil { - return nil, fmt.Errorf("failed to deregister ssm instance %s: %v", id, err) + return nil, fmt.Errorf("listing ssm instances by tags: %v", err) } - return output, nil + return output.InstanceInformationList, nil } diff --git a/internal/test/cleanup/cleanup.go b/internal/test/cleanup/cleanup.go index 7423faaea3bf6..50a2e0dd67923 100644 --- a/internal/test/cleanup/cleanup.go +++ b/internal/test/cleanup/cleanup.go @@ -5,25 +5,19 @@ import ( "fmt" "os" "strconv" - "strings" "time" "github.com/aws/aws-sdk-go/aws/session" - "github.com/bmc-toolbox/bmclib/v2" - "github.com/go-logr/logr" prismgoclient "github.com/nutanix-cloud-native/prism-go-client" v3 "github.com/nutanix-cloud-native/prism-go-client/v3" - "github.com/aws/eks-anywhere/internal/pkg/api" "github.com/aws/eks-anywhere/internal/pkg/ec2" "github.com/aws/eks-anywhere/internal/pkg/s3" - "github.com/aws/eks-anywhere/pkg/errors" "github.com/aws/eks-anywhere/pkg/executables" "github.com/aws/eks-anywhere/pkg/filewriter" "github.com/aws/eks-anywhere/pkg/logger" "github.com/aws/eks-anywhere/pkg/providers/cloudstack/decoder" "github.com/aws/eks-anywhere/pkg/providers/nutanix" - "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" "github.com/aws/eks-anywhere/pkg/retrier" "github.com/aws/eks-anywhere/pkg/validations" ) @@ -195,86 +189,3 @@ func NutanixTestResources(clusterName, endpoint, port string, insecure, ignoreEr } return nil } - -// TinkerbellTestResources cleans up machines by powering them down. -func TinkerbellTestResources(inventoryCSVFilePath string, ignoreErrors bool) error { - hardwarePool, err := api.NewHardwareMapFromFile(inventoryCSVFilePath) - if err != nil { - return fmt.Errorf("failed to create hardware map from inventory csv: %v", err) - } - - logger.Info("Powering off hardware: %+v", hardwarePool) - return powerOffHardwarePool(hardwarePool, ignoreErrors) -} - -func powerOffHardwarePool(hardware map[string]*hardware.Machine, ignoreErrors bool) error { - errList := []error{} - for _, h := range hardware { - if err := powerOffHardware(h, ignoreErrors); err != nil { - errList = append(errList, err) - } - } - - if len(errList) > 0 { - return fmt.Errorf("failed to power off %d hardware: %+v", len(errList), errors.NewAggregate(errList)) - } - - return nil -} - -func powerOffHardware(h *hardware.Machine, ignoreErrors bool) (reterror error) { - ctx, done := context.WithTimeout(context.Background(), 2*time.Minute) - defer done() - bmcClient := newBmclibClient(logr.Discard(), h.BMCIPAddress, h.BMCUsername, h.BMCPassword) - - if err := bmcClient.Open(ctx); err != nil { - md := bmcClient.GetMetadata() - logger.Info("Warning: Failed to open connection to BMC: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - return handlePowerOffHardwareError(err, ignoreErrors) - } - - md := bmcClient.GetMetadata() - logger.Info("Connected to BMC: hardware: %v, providersAttempted: %v, successfulProvider: %v", h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - - defer func() { - if err := bmcClient.Close(ctx); err != nil { - md := bmcClient.GetMetadata() - logger.Info("Warning: BMC close connection failed: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.FailedProviderDetail) - reterror = handlePowerOffHardwareError(err, ignoreErrors) - } - }() - - state, err := bmcClient.GetPowerState(ctx) - if err != nil { - state = "unknown" - } - if strings.Contains(strings.ToLower(state), "off") { - return nil - } - - if _, err := bmcClient.SetPowerState(ctx, "off"); err != nil { - md := bmcClient.GetMetadata() - logger.Info("Warning: failed to power off hardware: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - return handlePowerOffHardwareError(err, ignoreErrors) - } - - return nil -} - -func handlePowerOffHardwareError(err error, ignoreErrors bool) error { - if err != nil && !ignoreErrors { - return err - } - return nil -} - -// newBmclibClient creates a new BMClib client. -func newBmclibClient(log logr.Logger, hostIP, username, password string) *bmclib.Client { - o := []bmclib.Option{} - log = log.WithValues("host", hostIP, "username", username) - o = append(o, bmclib.WithLogger(log)) - client := bmclib.NewClient(hostIP, username, password, o...) - client.Registry.Drivers = client.Registry.PreferProtocol("redfish") - - return client -} diff --git a/internal/test/cleanup/tinkerbell.go b/internal/test/cleanup/tinkerbell.go new file mode 100644 index 0000000000000..c51807e61d9a1 --- /dev/null +++ b/internal/test/cleanup/tinkerbell.go @@ -0,0 +1,100 @@ +package cleanup + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/bmc-toolbox/bmclib/v2" + "github.com/go-logr/logr" + + "github.com/aws/eks-anywhere/internal/pkg/api" + "github.com/aws/eks-anywhere/pkg/errors" + "github.com/aws/eks-anywhere/pkg/logger" + "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" +) + +// PowerOffTinkerbellMachinesFromFile cleans up machines by powering them down. +func PowerOffTinkerbellMachinesFromFile(inventoryCSVFilePath string, ignoreErrors bool) error { + hardwarePool, err := api.ReadTinkerbellHardwareFromFile(inventoryCSVFilePath) + if err != nil { + return fmt.Errorf("failed to create hardware map from inventory csv: %v", err) + } + + logger.Info("Powering off hardware: %+v", hardwarePool) + return PowerOffTinkerbellMachines(hardwarePool, ignoreErrors) +} + +// PowerOffTinkerbellMachines powers off machines. +func PowerOffTinkerbellMachines(hardware []*hardware.Machine, ignoreErrors bool) error { + errList := []error{} + for _, h := range hardware { + if err := powerOffTinkerbellMachine(h, ignoreErrors); err != nil { + errList = append(errList, err) + } + } + + if len(errList) > 0 { + return fmt.Errorf("failed to power off %d hardware: %+v", len(errList), errors.NewAggregate(errList)) + } + + return nil +} + +func powerOffTinkerbellMachine(h *hardware.Machine, ignoreErrors bool) (reterror error) { + ctx, done := context.WithTimeout(context.Background(), 2*time.Minute) + defer done() + bmcClient := newBmclibClient(logr.Discard(), h.BMCIPAddress, h.BMCUsername, h.BMCPassword) + + if err := bmcClient.Open(ctx); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: Failed to open connection to BMC: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + return handlePowerOffHardwareError(err, ignoreErrors) + } + + md := bmcClient.GetMetadata() + logger.Info("Connected to BMC: hardware: %v, providersAttempted: %v, successfulProvider: %v", h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + + defer func() { + if err := bmcClient.Close(ctx); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: BMC close connection failed: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.FailedProviderDetail) + reterror = handlePowerOffHardwareError(err, ignoreErrors) + } + }() + + state, err := bmcClient.GetPowerState(ctx) + if err != nil { + state = "unknown" + } + if strings.Contains(strings.ToLower(state), "off") { + return nil + } + + if _, err := bmcClient.SetPowerState(ctx, "off"); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: failed to power off hardware: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + return handlePowerOffHardwareError(err, ignoreErrors) + } + + return nil +} + +func handlePowerOffHardwareError(err error, ignoreErrors bool) error { + if err != nil && !ignoreErrors { + return err + } + return nil +} + +// newBmclibClient creates a new BMClib client. +func newBmclibClient(log logr.Logger, hostIP, username, password string) *bmclib.Client { + o := []bmclib.Option{} + log = log.WithValues("host", hostIP, "username", username) + o = append(o, bmclib.WithLogger(log)) + client := bmclib.NewClient(hostIP, username, password, o...) + client.Registry.Drivers = client.Registry.PreferProtocol("redfish") + + return client +} diff --git a/internal/test/e2e/run.go b/internal/test/e2e/run.go index c2870bc8fa501..2d64b55f3f18c 100644 --- a/internal/test/e2e/run.go +++ b/internal/test/e2e/run.go @@ -13,7 +13,6 @@ import ( "github.com/go-logr/logr" "github.com/aws/eks-anywhere/internal/pkg/api" - "github.com/aws/eks-anywhere/internal/pkg/s3" "github.com/aws/eks-anywhere/internal/pkg/ssm" "github.com/aws/eks-anywhere/pkg/networkutils" e2etest "github.com/aws/eks-anywhere/test/e2e" @@ -256,7 +255,7 @@ func RunTests(conf *instanceRunConf) (testInstanceID string, testCommandResult * return "", nil, err } - if err := conf.reserveHardware(); err != nil { + if err = conf.reserveHardware(); err != nil { return "", nil, err } defer conf.releaseHardware() @@ -479,7 +478,7 @@ func nonAirgappedTinkerbellRunConfs(testsList []string, conf ParallelRunConf, ip if err != nil { return nil, err } - hardware, err := getNonAirgappedHardwarePool(conf.StorageBucket) + hardware, err := nonAirgappedHardwarePool(conf.session, conf.StorageBucket) if err != nil { return nil, fmt.Errorf("failed to get non-airgapped hardware inventory for Tinkerbell Tests: %v", err) } @@ -517,7 +516,7 @@ func airgappedTinkerbellRunConfs(testsList []string, conf ParallelRunConf, ipMan return nil, err } - hardware, err := getAirgappedHardwarePool(conf.StorageBucket) + hardware, err := airgappedHardwarePool(conf.session, conf.StorageBucket) if err != nil { return nil, fmt.Errorf("failed to get airgapped hardware inventory for Tinkerbell Tests: %v", err) } @@ -593,42 +592,6 @@ func logTestGroups(logger logr.Logger, instancesConf []*instanceRunConf) { logger.V(1).Info("Running tests in parallel", "testsGroups", testGroups) } -func getNonAirgappedHardwarePool(storageBucket string) ([]*api.Hardware, error) { - awsSession, err := session.NewSession() - if err != nil { - return nil, fmt.Errorf("creating aws session for tests: %v", err) - } - err = s3.DownloadToDisk(awsSession, os.Getenv(tinkerbellHardwareS3FileKeyEnvVar), storageBucket, e2eHardwareCsvFilePath) - if err != nil { - return nil, fmt.Errorf("failed to download tinkerbell hardware csv: %v", err) - } - - hardware, err := api.ReadTinkerbellHardwareFromFile(e2eHardwareCsvFilePath) - if err != nil { - return nil, fmt.Errorf("failed to get Tinkerbell hardware: %v", err) - } - return hardware, nil -} - -// Airgapped tinkerbell tests have special hardware requirements that doesn't have internet connectivity. -func getAirgappedHardwarePool(storageBucket string) ([]*api.Hardware, error) { - awsSession, err := session.NewSession() - if err != nil { - return nil, fmt.Errorf("creating aws session for tests: %v", err) - } - err = s3.DownloadToDisk(awsSession, os.Getenv(tinkerbellAirgappedHardwareS3FileKeyEnvVar), storageBucket, e2eAirgappedHardwareCsvFilePath) - if err != nil { - return nil, fmt.Errorf("downloading tinkerbell airgapped hardware csv: %v", err) - } - - hardware, err := api.ReadTinkerbellHardwareFromFile(e2eAirgappedHardwareCsvFilePath) - if err != nil { - return nil, fmt.Errorf("failed to get Tinkerbell hardware: %v", err) - } - - return hardware, nil -} - func logTinkerbellTestHardwareInfo(conf *instanceRunConf, action string) { var hardwareInfo []string for _, hardware := range conf.Hardware { diff --git a/internal/test/e2e/testRunner.go b/internal/test/e2e/test_runner.go similarity index 82% rename from internal/test/e2e/testRunner.go rename to internal/test/e2e/test_runner.go index ee4e881af78d0..62c5fa9a3188e 100644 --- a/internal/test/e2e/testRunner.go +++ b/internal/test/e2e/test_runner.go @@ -49,12 +49,8 @@ const ( func newTestRunner(runnerType TestRunnerType, config TestInfraConfig) (TestRunner, error) { if runnerType == VSphereTestRunnerType { - var err error v := &config.VSphereTestRunner - v.envMap, err = v.setEnvironment() - if err != nil { - return nil, fmt.Errorf("failed to set env for vSphere test runner: %v", err) - } + v.setEnvironment() return v, nil } else { return &config.Ec2TestRunner, nil @@ -72,7 +68,10 @@ func NewTestRunnerConfigFromFile(logger logr.Logger, configFile string) (*TestIn return nil, fmt.Errorf("failed to create test runner config from file: %v", err) } - config := TestInfraConfig{} + config, err := ReadRunnerConfig(configFile) + if err != nil { + return nil, fmt.Errorf("failed to create test runner config from file: %v", err) + } config.VSphereTestRunner.logger = logger config.Ec2TestRunner.logger = logger @@ -81,7 +80,35 @@ func NewTestRunnerConfigFromFile(logger logr.Logger, configFile string) (*TestIn return nil, fmt.Errorf("failed to create test runner config from file: %v", err) } - return &config, nil + return config, nil +} + +// ReadRunnerConfig reads the runner config from the given file. +func ReadRunnerConfig(configFile string) (*TestInfraConfig, error) { + file, err := os.ReadFile(configFile) + if err != nil { + return nil, fmt.Errorf("reading runner config: %w", err) + } + + config := &TestInfraConfig{} + err = yaml.Unmarshal(file, config) + if err != nil { + return nil, fmt.Errorf("unmarshalling runner config: %v", err) + } + + if vSphereUsername, ok := os.LookupEnv(testRunnerVCUserEnvVar); ok && len(vSphereUsername) > 0 { + config.VSphereTestRunner.Username = vSphereUsername + } else { + return nil, fmt.Errorf("missing environment variable: %s", testRunnerVCUserEnvVar) + } + + if vSpherePassword, ok := os.LookupEnv(testRunnerVCPasswordEnvVar); ok && len(vSpherePassword) > 0 { + config.VSphereTestRunner.Password = vSpherePassword + } else { + return nil, fmt.Errorf("missing environment variable: %s", testRunnerVCPasswordEnvVar) + } + + return config, nil } type testRunner struct { @@ -99,7 +126,8 @@ type VSphereTestRunner struct { testRunner ActivationId string envMap map[string]string - Url string `yaml:"url"` + + URL string `yaml:"url"` Insecure bool `yaml:"insecure"` Library string `yaml:"library"` Template string `yaml:"template"` @@ -108,34 +136,28 @@ type VSphereTestRunner struct { ResourcePool string `yaml:"resourcePool"` Network string `yaml:"network"` Folder string `yaml:"folder"` + Username string + Password string } -func (v *VSphereTestRunner) setEnvironment() (map[string]string, error) { - envMap := make(map[string]string) - if vSphereUsername, ok := os.LookupEnv(testRunnerVCUserEnvVar); ok && len(vSphereUsername) > 0 { - envMap[govcUsernameKey] = vSphereUsername - } else { - return nil, fmt.Errorf("missing environment variable: %s", testRunnerVCUserEnvVar) - } - - if vSpherePassword, ok := os.LookupEnv(testRunnerVCPasswordEnvVar); ok && len(vSpherePassword) > 0 { - envMap[govcPasswordKey] = vSpherePassword - } else { - return nil, fmt.Errorf("missing environment variable: %s", testRunnerVCPasswordEnvVar) +func (v *VSphereTestRunner) setEnvironment() { + v.envMap = map[string]string{ + govcUsernameKey: v.Username, + govcPasswordKey: v.Password, + govcURLKey: v.URL, + govcInsecure: strconv.FormatBool(v.Insecure), + govcDatacenterKey: v.Datacenter, } - - envMap[govcURLKey] = v.Url - envMap[govcInsecure] = strconv.FormatBool(v.Insecure) - envMap[govcDatacenterKey] = v.Datacenter - - v.envMap = envMap - return envMap, nil } func (v *VSphereTestRunner) createInstance(c *instanceRunConf) (string, error) { name := getTestRunnerName(v.logger, c.JobID) - ssmActivationInfo, err := ssm.CreateActivation(c.Session, name, c.InstanceProfileName) + ssmActivationInfo, err := ssm.CreateActivation( + // It's important to add the tinkerbell job tag since that's what we use to then search + // for lingering activations and instances to clean up. + c.Session, name, c.InstanceProfileName, ssm.Tag{tinkerbellJobTag, c.JobID}, + ) if err != nil { return "", fmt.Errorf("unable to create ssm activation: %v", err) } @@ -199,7 +221,12 @@ func (e *Ec2TestRunner) createInstance(c *instanceRunConf) (string, error) { func (v *VSphereTestRunner) tagInstance(c *instanceRunConf, key, value string) error { vmName := getTestRunnerName(v.logger, c.JobID) vmPath := fmt.Sprintf("/%s/vm/%s/%s", v.Datacenter, v.Folder, vmName) - tag := fmt.Sprintf("%s:%s", key, value) + var tag string + if value != "" { + tag = fmt.Sprintf("%s:%s", key, value) + } else { + tag = key + } if err := vsphere.TagVirtualMachine(v.envMap, vmPath, tag); err != nil { return fmt.Errorf("failed to tag vSphere test runner: %v", err) @@ -216,8 +243,8 @@ func (e *Ec2TestRunner) tagInstance(c *instanceRunConf, key, value string) error } func (v *VSphereTestRunner) decommInstance(c *instanceRunConf) error { - _, deregisterError := ssm.DeregisterInstance(c.Session, v.InstanceID) - _, deactivateError := ssm.DeleteActivation(c.Session, v.ActivationId) + _, deregisterError := ssm.DeregisterInstances(c.Session, v.InstanceID) + _, deactivateError := ssm.DeleteActivations(c.Session, v.ActivationId) deleteError := cleanup.VsphereRmVms(context.Background(), getTestRunnerName(v.logger, c.JobID), executables.WithGovcEnvMap(v.envMap)) if deregisterError != nil { diff --git a/internal/test/e2e/tinkerbell.go b/internal/test/e2e/tinkerbell.go index 796cf449841b4..deb6219f4486a 100644 --- a/internal/test/e2e/tinkerbell.go +++ b/internal/test/e2e/tinkerbell.go @@ -1,13 +1,16 @@ package e2e import ( + "context" "fmt" "os" "regexp" + "github.com/aws/aws-sdk-go/aws/session" "github.com/go-logr/logr" "github.com/aws/eks-anywhere/internal/pkg/api" + "github.com/aws/eks-anywhere/internal/pkg/s3" "github.com/aws/eks-anywhere/internal/pkg/ssm" e2etests "github.com/aws/eks-anywhere/test/framework" ) @@ -24,6 +27,9 @@ const ( tinkerbellDefaultMaxHardwarePerE2ETest = 4 tinkerbellBootstrapInterfaceEnvVar = "T_TINKERBELL_BOOTSTRAP_INTERFACE" tinkerbellCIEnvironmentEnvVar = "T_TINKERBELL_CI_ENVIRONMENT" + + // tinkerbellJobTag is the tag used to map vm runners and SSM activations to an e2e job. + tinkerbellJobTag = "eksa-tinkerbell-e2e-job" ) // TinkerbellTest maps each Tinkbell test with the hardware count needed for the test. @@ -125,3 +131,74 @@ func getTinkerbellAirgappedTests(tests []string) []string { } return tinkerbellTests } + +// ReadTinkerbellMachinePool returns the list of baremetal machines designated for e2e tests. +func ReadTinkerbellMachinePool(session *session.Session, bucketName string) ([]*api.Hardware, error) { + hardware := []*api.Hardware{} + machines, err := nonAirgappedHardwarePool(session, bucketName) + if err != nil { + return nil, err + } + hardware = append(hardware, machines...) + + machines, err = airgappedHardwarePool(session, bucketName) + if err != nil { + return nil, err + } + hardware = append(hardware, machines...) + + return hardware, nil +} + +func nonAirgappedHardwarePool(session *session.Session, storageBucket string) ([]*api.Hardware, error) { + err := s3.DownloadToDisk(session, os.Getenv(tinkerbellHardwareS3FileKeyEnvVar), storageBucket, e2eHardwareCsvFilePath) + if err != nil { + return nil, fmt.Errorf("failed to download tinkerbell hardware csv: %v", err) + } + + hardware, err := api.ReadTinkerbellHardwareFromFile(e2eHardwareCsvFilePath) + if err != nil { + return nil, fmt.Errorf("failed to get Tinkerbell hardware: %v", err) + } + return hardware, nil +} + +// airgappedHardwarePool returns the hardware pool for airgapped tinkerbell tests. +// Airgapped tinkerbell tests have special hardware requirements that doesn't have internet connectivity. +func airgappedHardwarePool(session *session.Session, storageBucket string) ([]*api.Hardware, error) { + err := s3.DownloadToDisk(session, os.Getenv(tinkerbellAirgappedHardwareS3FileKeyEnvVar), storageBucket, e2eAirgappedHardwareCsvFilePath) + if err != nil { + return nil, fmt.Errorf("downloading tinkerbell airgapped hardware csv: %v", err) + } + + hardware, err := api.ReadTinkerbellHardwareFromFile(e2eAirgappedHardwareCsvFilePath) + if err != nil { + return nil, fmt.Errorf("failed to get Tinkerbell hardware: %v", err) + } + + return hardware, nil +} + +type TinkerbellSSMInstances struct { + // InstanceIDs is a list of SSM instance IDs created for the vm runners. + InstanceIDs []string + // ActivationIDs is a list of SSM activation IDs created for the vm runners. + ActivationIDs []string +} + +// ListTinkerbellSSMInstances returns a list of SSM instances created for the tinkerbell vm runners. +func ListTinkerbellSSMInstances(ctx context.Context, session *session.Session) (*TinkerbellSSMInstances, error) { + runnerInstances := &TinkerbellSSMInstances{} + + instances, err := ssm.ListInstancesByTags(ctx, session, ssm.Tag{Key: tinkerbellJobTag, Value: "*"}) + if err != nil { + return nil, fmt.Errorf("listing tinkerbell runners: %v", err) + } + + for _, instance := range instances { + runnerInstances.ActivationIDs = append(runnerInstances.ActivationIDs, *instance.ActivationId) + runnerInstances.InstanceIDs = append(runnerInstances.InstanceIDs, *instance.InstanceId) + } + + return runnerInstances, nil +} diff --git a/pkg/executables/govc.go b/pkg/executables/govc.go index 6f3c653ae224f..628ced5e02e5a 100644 --- a/pkg/executables/govc.go +++ b/pkg/executables/govc.go @@ -19,6 +19,7 @@ import ( "sigs.k8s.io/yaml" "github.com/aws/eks-anywhere/pkg/api/v1alpha1" + "github.com/aws/eks-anywhere/pkg/clients/vsphere" "github.com/aws/eks-anywhere/pkg/config" "github.com/aws/eks-anywhere/pkg/filewriter" "github.com/aws/eks-anywhere/pkg/logger" @@ -98,6 +99,26 @@ func WithGovcEnvMap(envMap map[string]string) GovcOpt { } } +type GovcConfig struct { + Username string + Password string + URL string + Insecure bool + Datacenter string +} + +// Configure sets up the govc executable with the provided configuration. +// This is not thread safe. +func (g *Govc) Configure(config GovcConfig) { + g.envMap = map[string]string{ + govcUsernameKey: config.Username, + govcPasswordKey: config.Password, + govcURLKey: config.URL, + govcInsecure: strconv.FormatBool(config.Insecure), + govcDatacenterKey: config.Datacenter, + } +} + func (g *Govc) exec(ctx context.Context, args ...string) (stdout bytes.Buffer, err error) { envMap, err := g.validateAndSetupCreds() if err != nil { @@ -503,7 +524,7 @@ func (g *Govc) DeleteTemplate(ctx context.Context, resourcePool, templatePath st if err := g.removeSnapshotsFromVM(ctx, templatePath); err != nil { return err } - if err := g.deleteVM(ctx, templatePath); err != nil { + if err := g.DeleteVM(ctx, templatePath); err != nil { return err } @@ -524,7 +545,7 @@ func (g *Govc) removeSnapshotsFromVM(ctx context.Context, path string) error { return nil } -func (g *Govc) deleteVM(ctx context.Context, path string) error { +func (g *Govc) DeleteVM(ctx context.Context, path string) error { if _, err := g.exec(ctx, "vm.destroy", path); err != nil { return fmt.Errorf("deleting vm: %v", err) } @@ -1218,3 +1239,27 @@ func getValueFromString(str string) (int, error) { } return numValue, nil } + +type vmsResponse struct { + Elements []vsphere.VM `json:"elements"` +} + +// ListVMs returns the list of VMs in the provided folder.er +func (g *Govc) ListVMs(ctx context.Context, folder string) ([]vsphere.VM, error) { + vmsOutput, err := g.exec(ctx, "ls", "-t", "VirtualMachine", "-json", folder) + if err != nil { + return nil, fmt.Errorf("govc returned error when listing vms: %w", err) + } + + vmsJson := vmsOutput.String() + if vmsJson == "null" || vmsJson == "" { + return nil, nil + } + + vms := &vmsResponse{} + if err = json.Unmarshal([]byte(vmsJson), vms); err != nil { + return nil, fmt.Errorf("failed unmarshalling govc response from list vms: %w", err) + } + + return vms.Elements, nil +} diff --git a/pkg/executables/govc_test.go b/pkg/executables/govc_test.go index f3c03109167e1..2d3bb9b090ad5 100644 --- a/pkg/executables/govc_test.go +++ b/pkg/executables/govc_test.go @@ -20,6 +20,7 @@ import ( "github.com/aws/eks-anywhere/internal/test" "github.com/aws/eks-anywhere/pkg/api/v1alpha1" + "github.com/aws/eks-anywhere/pkg/clients/vsphere" "github.com/aws/eks-anywhere/pkg/executables" mockexecutables "github.com/aws/eks-anywhere/pkg/executables/mocks" "github.com/aws/eks-anywhere/pkg/retrier" @@ -1722,3 +1723,61 @@ func TestGovcGetResourcePoolInfo(t *testing.T) { }) } } + +func TestListVMs(t *testing.T) { + testCases := []struct { + name string + folder string + vsphereResponse string + want []vsphere.VM + }{ + { + name: "null response", + folder: "my-vms", + vsphereResponse: `null`, + want: nil, + }, + { + name: "empty response", + folder: "my-vms", + vsphereResponse: `null`, + want: nil, + }, + { + name: "some vms", + folder: "my-vms", + vsphereResponse: `{ + "elements": [ + { + "name": "vm1", + "path": "/SDDC-Datacenter/vm/my-vms/vm1" + }, + { + "name": "vm2", + "path": "/SDDC-Datacenter/vm/my-vms/vm2" + } + ] +}`, + want: []vsphere.VM{ + { + Path: "/SDDC-Datacenter/vm/my-vms/vm1", + }, + { + Path: "/SDDC-Datacenter/vm/my-vms/vm2", + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + g := NewWithT(t) + + _, govc, executable, env := setup(t) + executable.EXPECT().ExecuteWithEnv(ctx, env, "ls", "-t", "VirtualMachine", "-json", tc.folder).Return(*bytes.NewBufferString(tc.vsphereResponse), nil) + + // g.Expect(err).NotTo(HaveOccurred()) + g.Expect(govc.ListVMs(ctx, tc.folder)).To(BeComparableTo(tc.want)) + }) + } +} diff --git a/test/framework/tinkerbell.go b/test/framework/tinkerbell.go index 1362ac3688e57..7ef7d8dc61c4e 100644 --- a/test/framework/tinkerbell.go +++ b/test/framework/tinkerbell.go @@ -146,7 +146,7 @@ func (t *Tinkerbell) WithProviderUpgrade(fillers ...api.TinkerbellFiller) Cluste // CleanupResources runs a clean up the Tinkerbell machines which simply powers them down. func (t *Tinkerbell) CleanupResources(_ string) error { - return cleanup.TinkerbellTestResources(t.inventoryCsvFilePath, true) + return cleanup.PowerOffTinkerbellMachinesFromFile(t.inventoryCsvFilePath, true) } // WithKubeVersionAndOS returns a cluster config filler that sets the cluster kube version and the right image for all