Skip to content

Commit

Permalink
Add command to cleanup tinkerbell test resources
Browse files Browse the repository at this point in the history
  • Loading branch information
g-gaston committed Jul 2, 2024
1 parent 46e3955 commit fb2aae5
Show file tree
Hide file tree
Showing 11 changed files with 564 additions and 178 deletions.
156 changes: 156 additions & 0 deletions cmd/integration_test/cmd/cleanuptinkerbell.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package cmd

import (
"context"
"fmt"
"log"

"github.com/aws/aws-sdk-go/aws/session"
"github.com/spf13/cobra"

"github.com/aws/eks-anywhere/internal/pkg/ssm"
"github.com/aws/eks-anywhere/internal/test/cleanup"
"github.com/aws/eks-anywhere/internal/test/e2e"
"github.com/aws/eks-anywhere/pkg/dependencies"
"github.com/aws/eks-anywhere/pkg/errors"
"github.com/aws/eks-anywhere/pkg/executables"
"github.com/aws/eks-anywhere/pkg/logger"
"github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware"
)

var cleanUpTinkerbellCmd = &cobra.Command{
Use: "tinkerbell",
Short: "Clean up tinkerbell e2e resources",
Long: "Deletes vms created for e2e testing on vsphere and powers off metal machines",
SilenceUsage: true,
PreRun: preRunCleanUpNutanixSetup,
RunE: func(cmd *cobra.Command, _ []string) error {
return cleanUpTinkerbellTestResources(cmd.Context())
},
}

var (
storageBucket string
instanceConfig string
dryRun bool
)

func init() {
cleanUpInstancesCmd.AddCommand(cleanUpTinkerbellCmd)
cleanUpTinkerbellCmd.Flags().StringVarP(&storageBucket, storageBucketFlagName, "s", "", "S3 bucket name where tinkerbell hardware inventory files are stored")
runE2ECmd.Flags().StringVar(&instanceConfig, instanceConfigFlagName, "", "File path to the instance-config.yml config")
cleanUpTinkerbellCmd.Flags().BoolVar(&dryRun, "dry-run", false, "Run command without deleting or powering off any resources")

if err := cleanUpTinkerbellCmd.MarkFlagRequired(storageBucketFlagName); err != nil {
log.Fatalf("Error marking flag %s as required: %v", storageBucketFlagName, err)
}

if err := cleanUpTinkerbellCmd.MarkFlagRequired(instanceConfigFlagName); err != nil {
log.Fatalf("Error marking flag %s as required: %v", instanceConfigFlagName, err)
}
}

// cleanUpTinkerbellTestResources deletes any test runner vm in vsphere and powers off all metal machines.
func cleanUpTinkerbellTestResources(ctx context.Context) error {
session, err := session.NewSession()
if err != nil {
return fmt.Errorf("creating session: %w", err)
}

deps, err := dependencies.NewFactory().WithGovc().Build(ctx)
if err != nil {
return err
}
defer deps.Close(ctx)
govc := deps.Govc

infraConfig, err := e2e.ReadRunnerConfig(instanceConfig)
if err != nil {
return fmt.Errorf("reading vms config for tests: %v", err)
}

govc.Configure(
executables.GovcConfig{
Username: infraConfig.Username,
Password: infraConfig.Password,
URL: infraConfig.URL,
Insecure: infraConfig.Insecure,
Datacenter: infraConfig.Datacenter,
},
)

var errs []error

if err := deleteSSMInstances(ctx, session); len(err) != 0 {
errs = append(errs, err...)
}

if err := deleteRunners(ctx, govc, infraConfig.Folder); len(err) != 0 {
errs = append(errs, err...)
}

if err := powerOffMachines(ctx, session); len(err) != 0 {
errs = append(errs, err...)
}

return errors.NewAggregate(errs)
}

func deleteSSMInstances(ctx context.Context, session *session.Session) []error {
var errs []error
if ssmInstances, err := e2e.ListTinkerbellSSMInstances(ctx, session); err != nil {
errs = append(errs, fmt.Errorf("listing ssm instances: %w", err))
} else if dryRun {
logger.Info("Found SSM instances", "instanceIDs", ssmInstances.InstanceIDs, "activationIDs", ssmInstances.ActivationIDs)
} else {
if _, err := ssm.DeregisterInstances(session, ssmInstances.InstanceIDs...); err != nil {
errs = append(errs, fmt.Errorf("deleting ssm instances: %w", err))
}
if _, err := ssm.DeleteActivations(session, ssmInstances.ActivationIDs...); err != nil {
errs = append(errs, fmt.Errorf("deleting ssm activations: %w", err))
}
}

return errs
}

func deleteRunners(ctx context.Context, govc *executables.Govc, folder string) []error {
var errs []error
if runners, err := govc.ListVMs(ctx, folder); err != nil {
errs = append(errs, fmt.Errorf("listing tinkerbell runners: %w", err))
} else if dryRun {
logger.Info("Found VM Runners", "vms", runners)
} else {
for _, vm := range runners {
if err := govc.DeleteVM(ctx, vm.Path); err != nil {
errs = append(errs, fmt.Errorf("deleting tinkerbell runner %s: %w", vm, err))
}
}
}

return errs
}

func powerOffMachines(_ context.Context, session *session.Session) []error {
var errs []error
if machines, err := e2e.ReadTinkerbellMachinePool(session, storageBucket); err != nil {
errs = append(errs, fmt.Errorf("reading tinkerbell machine pool: %v", err))
} else if dryRun {
logger.Info("Metal machine pool", "machines", names(machines))
} else {
if err = cleanup.PowerOffTinkerbellMachines(machines, true); err != nil {
errs = append(errs, fmt.Errorf("powering off tinkerbell machines: %v", err))
}
}

return errs
}

func names(h []*hardware.Machine) []string {
names := make([]string, 0, len(h))
for _, m := range h {
names = append(names, m.Hostname)
}

return names
}
37 changes: 28 additions & 9 deletions internal/pkg/ssm/activation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package ssm
import (
"fmt"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ssm"
)
Expand All @@ -12,7 +13,14 @@ type ActivationInfo struct {
ActivationID string
}

func CreateActivation(session *session.Session, instanceName, role string) (*ActivationInfo, error) {
// Tag is an SSM tag.
type Tag struct {
Key string
Value string
}

// CreateActivation creates an SSM Hybrid activation.
func CreateActivation(session *session.Session, instanceName, role string, tags ...Tag) (*ActivationInfo, error) {
s := ssm.New(session)

request := ssm.CreateActivationInput{
Expand All @@ -21,6 +29,12 @@ func CreateActivation(session *session.Session, instanceName, role string) (*Act
IamRole: &role,
}

for _, tag := range tags {
request.Tags = append(request.Tags,
&ssm.Tag{Key: aws.String(tag.Key), Value: aws.String(tag.Value)},
)
}

result, err := s.CreateActivation(&request)
if err != nil {
return nil, fmt.Errorf("failed to activate ssm instance %s: %v", instanceName, err)
Expand All @@ -29,17 +43,22 @@ func CreateActivation(session *session.Session, instanceName, role string) (*Act
return &ActivationInfo{ActivationCode: *result.ActivationCode, ActivationID: *result.ActivationId}, nil
}

func DeleteActivation(session *session.Session, activationId string) (*ssm.DeleteActivationOutput, error) {
// DeleteActivations deletes SSM activations.
func DeleteActivations(session *session.Session, ids ...string) ([]*ssm.DeleteActivationOutput, error) {
s := ssm.New(session)
var outputs []*ssm.DeleteActivationOutput
for _, id := range ids {
request := ssm.DeleteActivationInput{
ActivationId: &id,
}

request := ssm.DeleteActivationInput{
ActivationId: &activationId,
}
result, err := s.DeleteActivation(&request)
if err != nil {
return nil, fmt.Errorf("failed to delete ssm activation: %v", err)
}

result, err := s.DeleteActivation(&request)
if err != nil {
return nil, fmt.Errorf("failed to delete ssm activation: %v", err)
outputs = append(outputs, result)
}

return result, nil
return outputs, nil
}
41 changes: 35 additions & 6 deletions internal/pkg/ssm/instance.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package ssm

import (
"context"
"fmt"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ssm"
)
Expand Down Expand Up @@ -30,16 +32,43 @@ func GetInstanceByActivationId(session *session.Session, id string) (*ssm.Instan
return infoList[0], nil
}

func DeregisterInstance(session *session.Session, id string) (*ssm.DeregisterManagedInstanceOutput, error) {
// DeregisterInstances deregisters SSM instances.
func DeregisterInstances(session *session.Session, ids ...string) ([]*ssm.DeregisterManagedInstanceOutput, error) {
s := ssm.New(session)
input := ssm.DeregisterManagedInstanceInput{
InstanceId: &id,
var outputs []*ssm.DeregisterManagedInstanceOutput
for _, id := range ids {
input := ssm.DeregisterManagedInstanceInput{
InstanceId: &id,
}

output, err := s.DeregisterManagedInstance(&input)
if err != nil {
return nil, fmt.Errorf("failed to deregister ssm instance %s: %v", id, err)
}

outputs = append(outputs, output)
}

output, err := s.DeregisterManagedInstance(&input)
return outputs, nil
}

func ListInstancesByTags(ctx context.Context, session *session.Session, tags ...Tag) ([]*ssm.InstanceInformation, error) {
s := ssm.New(session)
input := ssm.DescribeInstanceInformationInput{
Filters: make([]*ssm.InstanceInformationStringFilter, 0, len(tags)),
}

for _, tag := range tags {
input.Filters = append(input.Filters, &ssm.InstanceInformationStringFilter{
Key: aws.String("tag:" + tag.Key),
Values: aws.StringSlice([]string{tag.Value}),
})
}

output, err := s.DescribeInstanceInformation(&input)
if err != nil {
return nil, fmt.Errorf("failed to deregister ssm instance %s: %v", id, err)
return nil, fmt.Errorf("listing ssm instances by tags: %v", err)
}

return output, nil
return output.InstanceInformationList, nil
}
89 changes: 0 additions & 89 deletions internal/test/cleanup/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,19 @@ import (
"fmt"
"os"
"strconv"
"strings"
"time"

"github.com/aws/aws-sdk-go/aws/session"
"github.com/bmc-toolbox/bmclib/v2"
"github.com/go-logr/logr"
prismgoclient "github.com/nutanix-cloud-native/prism-go-client"
v3 "github.com/nutanix-cloud-native/prism-go-client/v3"

"github.com/aws/eks-anywhere/internal/pkg/api"
"github.com/aws/eks-anywhere/internal/pkg/ec2"
"github.com/aws/eks-anywhere/internal/pkg/s3"
"github.com/aws/eks-anywhere/pkg/errors"
"github.com/aws/eks-anywhere/pkg/executables"
"github.com/aws/eks-anywhere/pkg/filewriter"
"github.com/aws/eks-anywhere/pkg/logger"
"github.com/aws/eks-anywhere/pkg/providers/cloudstack/decoder"
"github.com/aws/eks-anywhere/pkg/providers/nutanix"
"github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware"
"github.com/aws/eks-anywhere/pkg/retrier"
"github.com/aws/eks-anywhere/pkg/validations"
)
Expand Down Expand Up @@ -195,86 +189,3 @@ func NutanixTestResources(clusterName, endpoint, port string, insecure, ignoreEr
}
return nil
}

// TinkerbellTestResources cleans up machines by powering them down.
func TinkerbellTestResources(inventoryCSVFilePath string, ignoreErrors bool) error {
hardwarePool, err := api.NewHardwareMapFromFile(inventoryCSVFilePath)
if err != nil {
return fmt.Errorf("failed to create hardware map from inventory csv: %v", err)
}

logger.Info("Powering off hardware: %+v", hardwarePool)
return powerOffHardwarePool(hardwarePool, ignoreErrors)
}

func powerOffHardwarePool(hardware map[string]*hardware.Machine, ignoreErrors bool) error {
errList := []error{}
for _, h := range hardware {
if err := powerOffHardware(h, ignoreErrors); err != nil {
errList = append(errList, err)
}
}

if len(errList) > 0 {
return fmt.Errorf("failed to power off %d hardware: %+v", len(errList), errors.NewAggregate(errList))
}

return nil
}

func powerOffHardware(h *hardware.Machine, ignoreErrors bool) (reterror error) {
ctx, done := context.WithTimeout(context.Background(), 2*time.Minute)
defer done()
bmcClient := newBmclibClient(logr.Discard(), h.BMCIPAddress, h.BMCUsername, h.BMCPassword)

if err := bmcClient.Open(ctx); err != nil {
md := bmcClient.GetMetadata()
logger.Info("Warning: Failed to open connection to BMC: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns)
return handlePowerOffHardwareError(err, ignoreErrors)
}

md := bmcClient.GetMetadata()
logger.Info("Connected to BMC: hardware: %v, providersAttempted: %v, successfulProvider: %v", h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns)

defer func() {
if err := bmcClient.Close(ctx); err != nil {
md := bmcClient.GetMetadata()
logger.Info("Warning: BMC close connection failed: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.FailedProviderDetail)
reterror = handlePowerOffHardwareError(err, ignoreErrors)
}
}()

state, err := bmcClient.GetPowerState(ctx)
if err != nil {
state = "unknown"
}
if strings.Contains(strings.ToLower(state), "off") {
return nil
}

if _, err := bmcClient.SetPowerState(ctx, "off"); err != nil {
md := bmcClient.GetMetadata()
logger.Info("Warning: failed to power off hardware: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns)
return handlePowerOffHardwareError(err, ignoreErrors)
}

return nil
}

func handlePowerOffHardwareError(err error, ignoreErrors bool) error {
if err != nil && !ignoreErrors {
return err
}
return nil
}

// newBmclibClient creates a new BMClib client.
func newBmclibClient(log logr.Logger, hostIP, username, password string) *bmclib.Client {
o := []bmclib.Option{}
log = log.WithValues("host", hostIP, "username", username)
o = append(o, bmclib.WithLogger(log))
client := bmclib.NewClient(hostIP, username, password, o...)
client.Registry.Drivers = client.Registry.PreferProtocol("redfish")

return client
}
Loading

0 comments on commit fb2aae5

Please sign in to comment.