diff --git a/Makefile b/Makefile index 820c8b6720ea..fed0f13f542a 100644 --- a/Makefile +++ b/Makefile @@ -683,8 +683,10 @@ e2e-tests-binary: GOOS=$(GO_OS) GOARCH=$(GO_ARCH) $(GO) test ./test/e2e -c -o "$(E2E_OUTPUT_FILE)" -tags "$(E2E_TAGS)" -ldflags "-X github.com/aws/eks-anywhere/pkg/version.gitVersion=$(DEV_GIT_VERSION) -X github.com/aws/eks-anywhere/pkg/manifests/releases.manifestURL=$(RELEASE_MANIFEST_URL)" .PHONY: build-integration-test-binary +build-integration-test-binary: ALL_LINKER_FLAGS := $(LINKER_FLAGS) -X github.com/aws/eks-anywhere/pkg/version.gitVersion=$(DEV_GIT_VERSION) -X github.com/aws/eks-anywhere/pkg/manifests/releases.manifestURL=$(RELEASE_MANIFEST_URL) -s -w -buildid='' -extldflags -static +build-integration-test-binary: LINKER_FLAGS_ARG := -ldflags "$(ALL_LINKER_FLAGS)" build-integration-test-binary: - GOOS=$(GO_OS) GOARCH=$(GO_ARCH) $(GO) build -o bin/test github.com/aws/eks-anywhere/cmd/integration_test + GOOS=$(GO_OS) GOARCH=$(GO_ARCH) $(GO) build $(LINKER_FLAGS_ARG) -o bin/test github.com/aws/eks-anywhere/cmd/integration_test .PHONY: conformance conformance: diff --git a/cmd/integration_test/build/buildspecs/quick-test-eks-a-cli.yml b/cmd/integration_test/build/buildspecs/quick-test-eks-a-cli.yml index 46776a199820..721c6a9e6b93 100644 --- a/cmd/integration_test/build/buildspecs/quick-test-eks-a-cli.yml +++ b/cmd/integration_test/build/buildspecs/quick-test-eks-a-cli.yml @@ -203,6 +203,12 @@ phases: --insecure --ignoreErrors -v 4 + - > + ./bin/test e2e cleanup tinkerbell + --storage-bucket ${INTEGRATION_TEST_STORAGE_BUCKET} + --instance-config ${INTEGRATION_TEST_INFRA_CONFIG} + --dry-run + -v 4 build: commands: - export JOB_ID=$CODEBUILD_BUILD_ID diff --git a/cmd/integration_test/cmd/cleanuptinkerbell.go b/cmd/integration_test/cmd/cleanuptinkerbell.go new file mode 100644 index 000000000000..817065b7e07c --- /dev/null +++ b/cmd/integration_test/cmd/cleanuptinkerbell.go @@ -0,0 +1,156 @@ +package cmd + +import ( + "context" + "fmt" + "log" + + "github.com/aws/aws-sdk-go/aws/session" + "github.com/spf13/cobra" + + "github.com/aws/eks-anywhere/internal/pkg/ssm" + "github.com/aws/eks-anywhere/internal/test/cleanup" + "github.com/aws/eks-anywhere/internal/test/e2e" + "github.com/aws/eks-anywhere/pkg/dependencies" + "github.com/aws/eks-anywhere/pkg/errors" + "github.com/aws/eks-anywhere/pkg/executables" + "github.com/aws/eks-anywhere/pkg/logger" + "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" +) + +var cleanUpTinkerbellCmd = &cobra.Command{ + Use: "tinkerbell", + Short: "Clean up tinkerbell e2e resources", + Long: "Deletes vms created for e2e testing on vsphere and powers off metal machines", + SilenceUsage: true, + PreRun: preRunCleanUpNutanixSetup, + RunE: func(cmd *cobra.Command, _ []string) error { + return cleanUpTinkerbellTestResources(cmd.Context()) + }, +} + +var ( + storageBucket string + instanceConfig string + dryRun bool +) + +func init() { + cleanUpInstancesCmd.AddCommand(cleanUpTinkerbellCmd) + cleanUpTinkerbellCmd.Flags().StringVarP(&storageBucket, storageBucketFlagName, "s", "", "S3 bucket name where tinkerbell hardware inventory files are stored") + cleanUpTinkerbellCmd.Flags().StringVar(&instanceConfig, instanceConfigFlagName, "", "File path to the instance-config.yml config") + cleanUpTinkerbellCmd.Flags().BoolVar(&dryRun, "dry-run", false, "Run command without deleting or powering off any resources") + + if err := cleanUpTinkerbellCmd.MarkFlagRequired(storageBucketFlagName); err != nil { + log.Fatalf("Error marking flag %s as required: %v", storageBucketFlagName, err) + } + + if err := cleanUpTinkerbellCmd.MarkFlagRequired(instanceConfigFlagName); err != nil { + log.Fatalf("Error marking flag %s as required: %v", instanceConfigFlagName, err) + } +} + +// cleanUpTinkerbellTestResources deletes any test runner vm in vsphere and powers off all metal machines. +func cleanUpTinkerbellTestResources(ctx context.Context) error { + session, err := session.NewSession() + if err != nil { + return fmt.Errorf("creating session: %w", err) + } + + deps, err := dependencies.NewFactory().WithGovc().Build(ctx) + if err != nil { + return err + } + defer deps.Close(ctx) + govc := deps.Govc + + infraConfig, err := e2e.ReadRunnerConfig(instanceConfig) + if err != nil { + return fmt.Errorf("reading vms config for tests: %v", err) + } + + govc.Configure( + executables.GovcConfig{ + Username: infraConfig.Username, + Password: infraConfig.Password, + URL: infraConfig.URL, + Insecure: infraConfig.Insecure, + Datacenter: infraConfig.Datacenter, + }, + ) + + var errs []error + + if err := deleteSSMInstances(ctx, session); len(err) != 0 { + errs = append(errs, err...) + } + + if err := deleteRunners(ctx, govc, infraConfig.Folder); len(err) != 0 { + errs = append(errs, err...) + } + + if err := powerOffMachines(ctx, session); len(err) != 0 { + errs = append(errs, err...) + } + + return errors.NewAggregate(errs) +} + +func deleteSSMInstances(ctx context.Context, session *session.Session) []error { + var errs []error + if ssmInstances, err := e2e.ListTinkerbellSSMInstances(ctx, session); err != nil { + errs = append(errs, fmt.Errorf("listing ssm instances: %w", err)) + } else if dryRun { + logger.Info("Found SSM instances", "instanceIDs", ssmInstances.InstanceIDs, "activationIDs", ssmInstances.ActivationIDs) + } else { + if _, err := ssm.DeregisterInstances(session, ssmInstances.InstanceIDs...); err != nil { + errs = append(errs, fmt.Errorf("deleting ssm instances: %w", err)) + } + if _, err := ssm.DeleteActivations(session, ssmInstances.ActivationIDs...); err != nil { + errs = append(errs, fmt.Errorf("deleting ssm activations: %w", err)) + } + } + + return errs +} + +func deleteRunners(ctx context.Context, govc *executables.Govc, folder string) []error { + var errs []error + if runners, err := govc.ListVMs(ctx, folder); err != nil { + errs = append(errs, fmt.Errorf("listing tinkerbell runners: %w", err)) + } else if dryRun { + logger.Info("Found VM Runners", "vms", runners) + } else { + for _, vm := range runners { + if err := govc.DeleteVM(ctx, vm.Path); err != nil { + errs = append(errs, fmt.Errorf("deleting tinkerbell runner %s: %w", vm, err)) + } + } + } + + return errs +} + +func powerOffMachines(_ context.Context, session *session.Session) []error { + var errs []error + if machines, err := e2e.ReadTinkerbellMachinePool(session, storageBucket); err != nil { + errs = append(errs, fmt.Errorf("reading tinkerbell machine pool: %v", err)) + } else if dryRun { + logger.Info("Metal machine pool", "machines", names(machines)) + } else { + if err = cleanup.PowerOffTinkerbellMachines(machines, true); err != nil { + errs = append(errs, fmt.Errorf("powering off tinkerbell machines: %v", err)) + } + } + + return errs +} + +func names(h []*hardware.Machine) []string { + names := make([]string, 0, len(h)) + for _, m := range h { + names = append(names, m.Hostname) + } + + return names +} diff --git a/internal/pkg/api/hardware.go b/internal/pkg/api/hardware.go index 683f0f415352..242020147bee 100644 --- a/internal/pkg/api/hardware.go +++ b/internal/pkg/api/hardware.go @@ -25,7 +25,7 @@ const ( // Alias for backwards compatibility. type Hardware = hardware.Machine -func NewHardwareSlice(r io.Reader) ([]*Hardware, error) { +func ReadTinkerbellHardware(r io.Reader) ([]*Hardware, error) { hardware := []*Hardware{} if err := gocsv.Unmarshal(r, &hardware); err != nil { @@ -35,16 +35,16 @@ func NewHardwareSlice(r io.Reader) ([]*Hardware, error) { return hardware, nil } -func NewHardwareSliceFromFile(file string) ([]*Hardware, error) { +func ReadTinkerbellHardwareFromFile(file string) ([]*Hardware, error) { hardwareFile, err := os.OpenFile(file, os.O_RDONLY, os.ModePerm) if err != nil { return nil, fmt.Errorf("failed to create hardware slice from hardware file: %v", err) } - return NewHardwareSlice(hardwareFile) + return ReadTinkerbellHardware(hardwareFile) } func NewHardwareMapFromFile(file string) (map[string]*Hardware, error) { - slice, err := NewHardwareSliceFromFile(file) + slice, err := ReadTinkerbellHardwareFromFile(file) if err != nil { return nil, fmt.Errorf("failed to create hardware map from hardware file: %v", err) } diff --git a/internal/pkg/ssm/activation.go b/internal/pkg/ssm/activation.go index b92ca48d4707..2d1f0089d0ea 100644 --- a/internal/pkg/ssm/activation.go +++ b/internal/pkg/ssm/activation.go @@ -3,6 +3,7 @@ package ssm import ( "fmt" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ssm" ) @@ -12,7 +13,14 @@ type ActivationInfo struct { ActivationID string } -func CreateActivation(session *session.Session, instanceName, role string) (*ActivationInfo, error) { +// Tag is an SSM tag. +type Tag struct { + Key string + Value string +} + +// CreateActivation creates an SSM Hybrid activation. +func CreateActivation(session *session.Session, instanceName, role string, tags ...Tag) (*ActivationInfo, error) { s := ssm.New(session) request := ssm.CreateActivationInput{ @@ -21,6 +29,12 @@ func CreateActivation(session *session.Session, instanceName, role string) (*Act IamRole: &role, } + for _, tag := range tags { + request.Tags = append(request.Tags, + &ssm.Tag{Key: aws.String(tag.Key), Value: aws.String(tag.Value)}, + ) + } + result, err := s.CreateActivation(&request) if err != nil { return nil, fmt.Errorf("failed to activate ssm instance %s: %v", instanceName, err) @@ -29,17 +43,22 @@ func CreateActivation(session *session.Session, instanceName, role string) (*Act return &ActivationInfo{ActivationCode: *result.ActivationCode, ActivationID: *result.ActivationId}, nil } -func DeleteActivation(session *session.Session, activationId string) (*ssm.DeleteActivationOutput, error) { +// DeleteActivations deletes SSM activations. +func DeleteActivations(session *session.Session, ids ...string) ([]*ssm.DeleteActivationOutput, error) { s := ssm.New(session) + var outputs []*ssm.DeleteActivationOutput + for _, id := range ids { + request := ssm.DeleteActivationInput{ + ActivationId: &id, + } - request := ssm.DeleteActivationInput{ - ActivationId: &activationId, - } + result, err := s.DeleteActivation(&request) + if err != nil { + return nil, fmt.Errorf("failed to delete ssm activation: %v", err) + } - result, err := s.DeleteActivation(&request) - if err != nil { - return nil, fmt.Errorf("failed to delete ssm activation: %v", err) + outputs = append(outputs, result) } - return result, nil + return outputs, nil } diff --git a/internal/pkg/ssm/instance.go b/internal/pkg/ssm/instance.go index 6fd2b4b80ae2..d04d90a071b3 100644 --- a/internal/pkg/ssm/instance.go +++ b/internal/pkg/ssm/instance.go @@ -1,8 +1,10 @@ package ssm import ( + "context" "fmt" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/ssm" ) @@ -30,16 +32,43 @@ func GetInstanceByActivationId(session *session.Session, id string) (*ssm.Instan return infoList[0], nil } -func DeregisterInstance(session *session.Session, id string) (*ssm.DeregisterManagedInstanceOutput, error) { +// DeregisterInstances deregisters SSM instances. +func DeregisterInstances(session *session.Session, ids ...string) ([]*ssm.DeregisterManagedInstanceOutput, error) { s := ssm.New(session) - input := ssm.DeregisterManagedInstanceInput{ - InstanceId: &id, + var outputs []*ssm.DeregisterManagedInstanceOutput + for _, id := range ids { + input := ssm.DeregisterManagedInstanceInput{ + InstanceId: &id, + } + + output, err := s.DeregisterManagedInstance(&input) + if err != nil { + return nil, fmt.Errorf("failed to deregister ssm instance %s: %v", id, err) + } + + outputs = append(outputs, output) } - output, err := s.DeregisterManagedInstance(&input) + return outputs, nil +} + +func ListInstancesByTags(ctx context.Context, session *session.Session, tags ...Tag) ([]*ssm.InstanceInformation, error) { + s := ssm.New(session) + input := ssm.DescribeInstanceInformationInput{ + Filters: make([]*ssm.InstanceInformationStringFilter, 0, len(tags)), + } + + for _, tag := range tags { + input.Filters = append(input.Filters, &ssm.InstanceInformationStringFilter{ + Key: aws.String("tag:" + tag.Key), + Values: aws.StringSlice([]string{tag.Value}), + }) + } + + output, err := s.DescribeInstanceInformation(&input) if err != nil { - return nil, fmt.Errorf("failed to deregister ssm instance %s: %v", id, err) + return nil, fmt.Errorf("listing ssm instances by tags: %v", err) } - return output, nil + return output.InstanceInformationList, nil } diff --git a/internal/test/cleanup/cleanup.go b/internal/test/cleanup/cleanup.go index 67c7ec16e334..50a2e0dd6792 100644 --- a/internal/test/cleanup/cleanup.go +++ b/internal/test/cleanup/cleanup.go @@ -5,25 +5,19 @@ import ( "fmt" "os" "strconv" - "strings" "time" "github.com/aws/aws-sdk-go/aws/session" - "github.com/bmc-toolbox/bmclib/v2" - "github.com/go-logr/logr" prismgoclient "github.com/nutanix-cloud-native/prism-go-client" v3 "github.com/nutanix-cloud-native/prism-go-client/v3" - "github.com/aws/eks-anywhere/internal/pkg/api" "github.com/aws/eks-anywhere/internal/pkg/ec2" "github.com/aws/eks-anywhere/internal/pkg/s3" - "github.com/aws/eks-anywhere/pkg/errors" "github.com/aws/eks-anywhere/pkg/executables" "github.com/aws/eks-anywhere/pkg/filewriter" "github.com/aws/eks-anywhere/pkg/logger" "github.com/aws/eks-anywhere/pkg/providers/cloudstack/decoder" "github.com/aws/eks-anywhere/pkg/providers/nutanix" - "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" "github.com/aws/eks-anywhere/pkg/retrier" "github.com/aws/eks-anywhere/pkg/validations" ) @@ -90,7 +84,7 @@ func CleanUpVsphereTestResources(ctx context.Context, clusterName string) error } func VsphereRmVms(ctx context.Context, clusterName string, opts ...executables.GovcOpt) error { - logger.V(1).Info("Deleting vsphere vcenter vms") + logger.V(1).Info("Deleting vsphere vcenter vms", "clusterName", clusterName) executableBuilder, close, err := executables.InitInDockerExecutablesBuilder(ctx, executables.DefaultEksaImage()) if err != nil { return fmt.Errorf("unable to initialize executables: %v", err) @@ -195,86 +189,3 @@ func NutanixTestResources(clusterName, endpoint, port string, insecure, ignoreEr } return nil } - -// TinkerbellTestResources cleans up machines by powering them down. -func TinkerbellTestResources(inventoryCSVFilePath string, ignoreErrors bool) error { - hardwarePool, err := api.NewHardwareMapFromFile(inventoryCSVFilePath) - if err != nil { - return fmt.Errorf("failed to create hardware map from inventory csv: %v", err) - } - - logger.Info("Powering off hardware: %+v", hardwarePool) - return powerOffHardwarePool(hardwarePool, ignoreErrors) -} - -func powerOffHardwarePool(hardware map[string]*hardware.Machine, ignoreErrors bool) error { - errList := []error{} - for _, h := range hardware { - if err := powerOffHardware(h, ignoreErrors); err != nil { - errList = append(errList, err) - } - } - - if len(errList) > 0 { - return fmt.Errorf("failed to power off %d hardware: %+v", len(errList), errors.NewAggregate(errList)) - } - - return nil -} - -func powerOffHardware(h *hardware.Machine, ignoreErrors bool) (reterror error) { - ctx, done := context.WithTimeout(context.Background(), 2*time.Minute) - defer done() - bmcClient := newBmclibClient(logr.Discard(), h.BMCIPAddress, h.BMCUsername, h.BMCPassword) - - if err := bmcClient.Open(ctx); err != nil { - md := bmcClient.GetMetadata() - logger.Info("Warning: Failed to open connection to BMC: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - return handlePowerOffHardwareError(err, ignoreErrors) - } - - md := bmcClient.GetMetadata() - logger.Info("Connected to BMC: hardware: %v, providersAttempted: %v, successfulProvider: %v", h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - - defer func() { - if err := bmcClient.Close(ctx); err != nil { - md := bmcClient.GetMetadata() - logger.Info("Warning: BMC close connection failed: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.FailedProviderDetail) - reterror = handlePowerOffHardwareError(err, ignoreErrors) - } - }() - - state, err := bmcClient.GetPowerState(ctx) - if err != nil { - state = "unknown" - } - if strings.Contains(strings.ToLower(state), "off") { - return nil - } - - if _, err := bmcClient.SetPowerState(ctx, "off"); err != nil { - md := bmcClient.GetMetadata() - logger.Info("Warning: failed to power off hardware: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) - return handlePowerOffHardwareError(err, ignoreErrors) - } - - return nil -} - -func handlePowerOffHardwareError(err error, ignoreErrors bool) error { - if err != nil && !ignoreErrors { - return err - } - return nil -} - -// newBmclibClient creates a new BMClib client. -func newBmclibClient(log logr.Logger, hostIP, username, password string) *bmclib.Client { - o := []bmclib.Option{} - log = log.WithValues("host", hostIP, "username", username) - o = append(o, bmclib.WithLogger(log)) - client := bmclib.NewClient(hostIP, username, password, o...) - client.Registry.Drivers = client.Registry.PreferProtocol("redfish") - - return client -} diff --git a/internal/test/cleanup/tinkerbell.go b/internal/test/cleanup/tinkerbell.go new file mode 100644 index 000000000000..c51807e61d9a --- /dev/null +++ b/internal/test/cleanup/tinkerbell.go @@ -0,0 +1,100 @@ +package cleanup + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/bmc-toolbox/bmclib/v2" + "github.com/go-logr/logr" + + "github.com/aws/eks-anywhere/internal/pkg/api" + "github.com/aws/eks-anywhere/pkg/errors" + "github.com/aws/eks-anywhere/pkg/logger" + "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" +) + +// PowerOffTinkerbellMachinesFromFile cleans up machines by powering them down. +func PowerOffTinkerbellMachinesFromFile(inventoryCSVFilePath string, ignoreErrors bool) error { + hardwarePool, err := api.ReadTinkerbellHardwareFromFile(inventoryCSVFilePath) + if err != nil { + return fmt.Errorf("failed to create hardware map from inventory csv: %v", err) + } + + logger.Info("Powering off hardware: %+v", hardwarePool) + return PowerOffTinkerbellMachines(hardwarePool, ignoreErrors) +} + +// PowerOffTinkerbellMachines powers off machines. +func PowerOffTinkerbellMachines(hardware []*hardware.Machine, ignoreErrors bool) error { + errList := []error{} + for _, h := range hardware { + if err := powerOffTinkerbellMachine(h, ignoreErrors); err != nil { + errList = append(errList, err) + } + } + + if len(errList) > 0 { + return fmt.Errorf("failed to power off %d hardware: %+v", len(errList), errors.NewAggregate(errList)) + } + + return nil +} + +func powerOffTinkerbellMachine(h *hardware.Machine, ignoreErrors bool) (reterror error) { + ctx, done := context.WithTimeout(context.Background(), 2*time.Minute) + defer done() + bmcClient := newBmclibClient(logr.Discard(), h.BMCIPAddress, h.BMCUsername, h.BMCPassword) + + if err := bmcClient.Open(ctx); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: Failed to open connection to BMC: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + return handlePowerOffHardwareError(err, ignoreErrors) + } + + md := bmcClient.GetMetadata() + logger.Info("Connected to BMC: hardware: %v, providersAttempted: %v, successfulProvider: %v", h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + + defer func() { + if err := bmcClient.Close(ctx); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: BMC close connection failed: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.FailedProviderDetail) + reterror = handlePowerOffHardwareError(err, ignoreErrors) + } + }() + + state, err := bmcClient.GetPowerState(ctx) + if err != nil { + state = "unknown" + } + if strings.Contains(strings.ToLower(state), "off") { + return nil + } + + if _, err := bmcClient.SetPowerState(ctx, "off"); err != nil { + md := bmcClient.GetMetadata() + logger.Info("Warning: failed to power off hardware: %v, hardware: %v, providersAttempted: %v, failedProviderDetail: %v", err, h.BMCIPAddress, md.ProvidersAttempted, md.SuccessfulOpenConns) + return handlePowerOffHardwareError(err, ignoreErrors) + } + + return nil +} + +func handlePowerOffHardwareError(err error, ignoreErrors bool) error { + if err != nil && !ignoreErrors { + return err + } + return nil +} + +// newBmclibClient creates a new BMClib client. +func newBmclibClient(log logr.Logger, hostIP, username, password string) *bmclib.Client { + o := []bmclib.Option{} + log = log.WithValues("host", hostIP, "username", username) + o = append(o, bmclib.WithLogger(log)) + client := bmclib.NewClient(hostIP, username, password, o...) + client.Registry.Drivers = client.Registry.PreferProtocol("redfish") + + return client +} diff --git a/internal/test/e2e/run.go b/internal/test/e2e/run.go index 1f7a550e9987..2d64b55f3f18 100644 --- a/internal/test/e2e/run.go +++ b/internal/test/e2e/run.go @@ -13,7 +13,6 @@ import ( "github.com/go-logr/logr" "github.com/aws/eks-anywhere/internal/pkg/api" - "github.com/aws/eks-anywhere/internal/pkg/s3" "github.com/aws/eks-anywhere/internal/pkg/ssm" "github.com/aws/eks-anywhere/pkg/networkutils" e2etest "github.com/aws/eks-anywhere/test/e2e" @@ -52,12 +51,38 @@ type ParallelRunConf struct { BranchName string BaremetalBranchName string Logger logr.Logger + + infraConfig *TestInfraConfig + session *session.Session +} + +// isBaremetal checks is the test run is for Baremetal tests. +func (c ParallelRunConf) isBaremetal() bool { + return strings.EqualFold(c.BranchName, c.BaremetalBranchName) +} + +// init initializes ParallelRunConf. It needs to be called before the config is used. +// This is not thread safe. +func (c *ParallelRunConf) init() error { + infraConfig, err := NewTestRunnerConfigFromFile(c.Logger, c.TestInstanceConfigFile) + if err != nil { + return fmt.Errorf("creating test runner config for tests: %v", err) + } + c.infraConfig = infraConfig + + awsSession, err := session.NewSession() + if err != nil { + return fmt.Errorf("creating aws session for tests: %v", err) + } + c.session = awsSession + + return nil } type ( testCommandResult = ssm.RunOutput instanceTestsResults struct { - conf instanceRunConf + conf *instanceRunConf testCommandResult *testCommandResult err error } @@ -65,6 +90,10 @@ type ( // RunTestsInParallel Run Tests in parallel by spawning multiple admin machines. func RunTestsInParallel(conf ParallelRunConf) error { + if err := conf.init(); err != nil { + return err + } + testsList, skippedTests, err := listTests(conf.Regex, conf.TestsToSkip) if err != nil { return err @@ -87,26 +116,7 @@ func RunTestsInParallel(conf ParallelRunConf) error { logTestGroups(conf.Logger, instancesConf) - // For Tinkerbell tests, get hardware inventory pool - var invCatalogue map[string]*hardwareCatalogue - if strings.EqualFold(conf.BranchName, conf.BaremetalBranchName) { - nonAirgappedHardwareInv, err := getNonAirgappedHardwarePool(conf.StorageBucket) - if err != nil { - return fmt.Errorf("failed to get non-airgapped hardware inventory for Tinkerbell Tests: %v", err) - } - nonAirgappedInvCatalogue := newHardwareCatalogue(nonAirgappedHardwareInv) - airgappedHardwareInv, err := getAirgappedHardwarePool(conf.StorageBucket) - if err != nil { - return fmt.Errorf("failed to get airgapped hardware inventory for Tinkerbell Tests: %v", err) - } - airgappedInvCatalogue := newHardwareCatalogue(airgappedHardwareInv) - invCatalogue = map[string]*hardwareCatalogue{ - nonAirgappedHardware: nonAirgappedInvCatalogue, - airgappedHardware: airgappedInvCatalogue, - } - } - - work := make(chan instanceRunConf) + work := make(chan *instanceRunConf) results := make(chan instanceTestsResults) go func() { for _, instanceConf := range instancesConf { @@ -125,7 +135,7 @@ func RunTestsInParallel(conf ParallelRunConf) error { for c := range work { r := instanceTestsResults{conf: c} - r.conf.InstanceID, r.testCommandResult, err = RunTests(c, invCatalogue) + r.conf.InstanceID, r.testCommandResult, err = RunTests(c) if err != nil { r.err = err } @@ -203,28 +213,52 @@ type instanceRunConf struct { CleanupResources bool Logger logr.Logger Session *session.Session + + // hardwareCatalogue holds the hardware inventory for Tinkerbell tests. + // Before running the tests for this instance, the hardware needs to be + // reserved through this inventory and set in the Hardware field. + // After the tests are run, the hardware needs to be released. + hardwareCatalogue *hardwareCatalogue +} + +// reserveHardware allocates the required hardware for the test run. +// If hardware is not available, it will block until it is available. +func (c *instanceRunConf) reserveHardware() error { + if c.HardwareCount == 0 { + return nil + } + + reservedTinkerbellHardware, err := c.hardwareCatalogue.reserveHardware(c.HardwareCount) + if err != nil { + return fmt.Errorf("timed out waiting for hardware") + } + c.Hardware = reservedTinkerbellHardware + logTinkerbellTestHardwareInfo(c, "Reserved") + + return nil +} + +// releaseHardware de-allocates the hardware reserved for this test run. +// This should be called after the test run is complete. +func (c *instanceRunConf) releaseHardware() { + if c.HardwareCount == 0 { + return + } + logTinkerbellTestHardwareInfo(c, "Releasing") + c.hardwareCatalogue.releaseHardware(c.Hardware) } //nolint:gocyclo, revive // RunTests responsible launching test runner to run tests is complex. -func RunTests(conf instanceRunConf, inventoryCatalogue map[string]*hardwareCatalogue) (testInstanceID string, testCommandResult *testCommandResult, err error) { +func RunTests(conf *instanceRunConf) (testInstanceID string, testCommandResult *testCommandResult, err error) { testRunner, err := newTestRunner(conf.TestRunnerType, conf.TestRunnerConfig) if err != nil { return "", nil, err } - if conf.HardwareCount > 0 { - var hardwareCatalogue *hardwareCatalogue - if conf.TinkerbellAirgappedTest { - hardwareCatalogue = inventoryCatalogue[airgappedHardware] - } else { - hardwareCatalogue = inventoryCatalogue[nonAirgappedHardware] - } - err = reserveTinkerbellHardware(&conf, hardwareCatalogue) - if err != nil { - return "", nil, err - } - // Release hardware back to inventory for Tinkerbell Tests - defer releaseTinkerbellHardware(&conf, hardwareCatalogue) + + if err = conf.reserveHardware(); err != nil { + return "", nil, err } + defer conf.releaseHardware() conf.Logger.Info("Creating runner instance", "instance_profile_name", conf.InstanceProfileName, "storage_bucket", conf.StorageBucket, @@ -341,7 +375,7 @@ func (e *E2ESession) commandWithEnvVars(command string) string { return strings.Join(fullCommand, "; ") } -func splitTests(testsList []string, conf ParallelRunConf) ([]instanceRunConf, error) { +func splitTests(testsList []string, conf ParallelRunConf) ([]*instanceRunConf, error) { testPerInstance := len(testsList) / conf.MaxInstances if testPerInstance == 0 { testPerInstance = 1 @@ -354,22 +388,12 @@ func splitTests(testsList []string, conf ParallelRunConf) ([]instanceRunConf, er privateNetworkTestsRe := regexp.MustCompile(`^.*(Proxy|RegistryMirror).*$`) multiClusterTestsRe := regexp.MustCompile(`^.*Multicluster.*$`) - runConfs := make([]instanceRunConf, 0, conf.MaxInstances) + runConfs := make([]*instanceRunConf, 0, conf.MaxInstances) vsphereIPMan := newE2EIPManager(conf.Logger, os.Getenv(vsphereCidrVar)) vspherePrivateIPMan := newE2EIPManager(conf.Logger, os.Getenv(vspherePrivateNetworkCidrVar)) cloudstackIPMan := newE2EIPManager(conf.Logger, os.Getenv(cloudstackCidrVar)) nutanixIPMan := newE2EIPManager(conf.Logger, os.Getenv(nutanixCidrVar)) - awsSession, err := session.NewSession() - if err != nil { - return nil, fmt.Errorf("creating aws session for tests: %v", err) - } - - testRunnerConfig, err := NewTestRunnerConfigFromFile(conf.Logger, conf.TestInstanceConfigFile) - if err != nil { - return nil, fmt.Errorf("creating test runner config for tests: %v", err) - } - testsInEC2Instance := make([]string, 0, testPerInstance) for i, testName := range testsList { if tinkerbellTestsRe.MatchString(testName) { @@ -405,29 +429,43 @@ func splitTests(testsList []string, conf ParallelRunConf) ([]instanceRunConf, er } if len(testsInEC2Instance) == testPerInstance || (len(testsList)-1) == i { - runConfs = append(runConfs, newInstanceRunConf(awsSession, conf, len(runConfs), strings.Join(testsInEC2Instance, "|"), ips, []*api.Hardware{}, 0, false, Ec2TestRunnerType, testRunnerConfig)) + runConfs = append(runConfs, newInstanceRunConf(conf.session, conf, len(runConfs), strings.Join(testsInEC2Instance, "|"), ips, []*api.Hardware{}, 0, false, Ec2TestRunnerType, conf.infraConfig)) testsInEC2Instance = make([]string, 0, testPerInstance) } } - if strings.EqualFold(conf.BranchName, conf.BaremetalBranchName) { - tinkerbellIPManager := newE2EIPManager(conf.Logger, os.Getenv(tinkerbellControlPlaneNetworkCidrEnvVar)) - runConfs, err = appendNonAirgappedTinkerbellRunConfs(awsSession, testsList, conf, testRunnerConfig, runConfs, tinkerbellIPManager) + if conf.isBaremetal() { + confs, err := testConfigurationsForTinkerbell(testsList, conf) if err != nil { - return nil, fmt.Errorf("failed to split Tinkerbell tests: %v", err) + return nil, err } + runConfs = append(runConfs, confs...) + } - runConfs, err = appendAirgappedTinkerbellRunConfs(awsSession, testsList, conf, testRunnerConfig, runConfs, tinkerbellIPManager) - if err != nil { - return nil, fmt.Errorf("failed to run airgapped Tinkerbell tests: %v", err) - } + return runConfs, nil +} + +func testConfigurationsForTinkerbell(testsList []string, conf ParallelRunConf) ([]*instanceRunConf, error) { + runConfs := []*instanceRunConf{} + + tinkerbellIPManager := newE2EIPManager(conf.Logger, os.Getenv(tinkerbellControlPlaneNetworkCidrEnvVar)) + confs, err := nonAirgappedTinkerbellRunConfs(testsList, conf, tinkerbellIPManager) + if err != nil { + return nil, fmt.Errorf("failed to split Tinkerbell tests: %v", err) + } + runConfs = append(runConfs, confs...) + + confs, err = airgappedTinkerbellRunConfs(testsList, conf, tinkerbellIPManager) + if err != nil { + return nil, fmt.Errorf("failed to run airgapped Tinkerbell tests: %v", err) } + runConfs = append(runConfs, confs...) return runConfs, nil } //nolint:gocyclo // This legacy function is complex but the team too busy to simplify it -func appendNonAirgappedTinkerbellRunConfs(awsSession *session.Session, testsList []string, conf ParallelRunConf, testRunnerConfig *TestInfraConfig, runConfs []instanceRunConf, ipManager *E2EIPManager) ([]instanceRunConf, error) { +func nonAirgappedTinkerbellRunConfs(testsList []string, conf ParallelRunConf, ipManager *E2EIPManager) ([]*instanceRunConf, error) { nonAirgappedTinkerbellTests := getTinkerbellNonAirgappedTests(testsList) conf.Logger.V(1).Info("INFO:", "tinkerbellTests", len(nonAirgappedTinkerbellTests)) @@ -440,11 +478,20 @@ func appendNonAirgappedTinkerbellRunConfs(awsSession *session.Session, testsList if err != nil { return nil, err } + hardware, err := nonAirgappedHardwarePool(conf.session, conf.StorageBucket) + if err != nil { + return nil, fmt.Errorf("failed to get non-airgapped hardware inventory for Tinkerbell Tests: %v", err) + } + catalogue := newHardwareCatalogue(hardware) + + runConfs := make([]*instanceRunConf, 0, len(nonAirgappedTinkerbellTestsWithCount)) for i, test := range nonAirgappedTinkerbellTestsWithCount { testsInVSphereInstance = append(testsInVSphereInstance, test.Name) ipPool := ipManager.reserveIPPool(tinkerbellIPPoolSize) if len(testsInVSphereInstance) == testPerInstance || (len(testsList)-1) == i { - runConfs = append(runConfs, newInstanceRunConf(awsSession, conf, len(runConfs), strings.Join(testsInVSphereInstance, "|"), ipPool, []*api.Hardware{}, test.Count, false, VSphereTestRunnerType, testRunnerConfig)) + c := newInstanceRunConf(conf.session, conf, len(runConfs), strings.Join(testsInVSphereInstance, "|"), ipPool, []*api.Hardware{}, test.Count, false, VSphereTestRunnerType, conf.infraConfig) + c.hardwareCatalogue = catalogue + runConfs = append(runConfs, c) testsInVSphereInstance = make([]string, 0, testPerInstance) } } @@ -452,11 +499,11 @@ func appendNonAirgappedTinkerbellRunConfs(awsSession *session.Session, testsList return runConfs, nil } -func appendAirgappedTinkerbellRunConfs(awsSession *session.Session, testsList []string, conf ParallelRunConf, testRunnerConfig *TestInfraConfig, runConfs []instanceRunConf, ipManager *E2EIPManager) ([]instanceRunConf, error) { +func airgappedTinkerbellRunConfs(testsList []string, conf ParallelRunConf, ipManager *E2EIPManager) ([]*instanceRunConf, error) { airgappedTinkerbellTests := getTinkerbellAirgappedTests(testsList) if len(airgappedTinkerbellTests) == 0 { conf.Logger.V(1).Info("No tinkerbell airgapped test to run") - return runConfs, nil + return nil, nil } conf.Logger.V(1).Info("INFO:", "tinkerbellAirGappedTests", len(airgappedTinkerbellTests)) testPerInstance := len(airgappedTinkerbellTests) / conf.MaxInstances @@ -468,11 +515,21 @@ func appendAirgappedTinkerbellRunConfs(awsSession *session.Session, testsList [] if err != nil { return nil, err } + + hardware, err := airgappedHardwarePool(conf.session, conf.StorageBucket) + if err != nil { + return nil, fmt.Errorf("failed to get airgapped hardware inventory for Tinkerbell Tests: %v", err) + } + catalogue := newHardwareCatalogue(hardware) + + runConfs := make([]*instanceRunConf, 0, len(airgappedTinkerbellTestsWithCount)) for i, test := range airgappedTinkerbellTestsWithCount { testsInVSphereInstance = append(testsInVSphereInstance, test.Name) ipPool := ipManager.reserveIPPool(tinkerbellIPPoolSize) if len(testsInVSphereInstance) == testPerInstance || (len(testsList)-1) == i { - runConfs = append(runConfs, newInstanceRunConf(awsSession, conf, len(runConfs), strings.Join(testsInVSphereInstance, "|"), ipPool, []*api.Hardware{}, test.Count, true, VSphereTestRunnerType, testRunnerConfig)) + c := newInstanceRunConf(conf.session, conf, len(runConfs), strings.Join(testsInVSphereInstance, "|"), ipPool, []*api.Hardware{}, test.Count, true, VSphereTestRunnerType, conf.infraConfig) + c.hardwareCatalogue = catalogue + runConfs = append(runConfs, c) testsInVSphereInstance = make([]string, 0, testPerInstance) } } @@ -504,9 +561,9 @@ func getTinkerbellTestsWithCount(tinkerbellTests []string, conf ParallelRunConf) return tinkerbellTestsWithCount, nil } -func newInstanceRunConf(awsSession *session.Session, conf ParallelRunConf, jobNumber int, testRegex string, ipPool networkutils.IPPool, hardware []*api.Hardware, hardwareCount int, tinkerbellAirgappedTest bool, testRunnerType TestRunnerType, testRunnerConfig *TestInfraConfig) instanceRunConf { +func newInstanceRunConf(awsSession *session.Session, conf ParallelRunConf, jobNumber int, testRegex string, ipPool networkutils.IPPool, hardware []*api.Hardware, hardwareCount int, tinkerbellAirgappedTest bool, testRunnerType TestRunnerType, testRunnerConfig *TestInfraConfig) *instanceRunConf { jobID := fmt.Sprintf("%s-%d", conf.JobId, jobNumber) - return instanceRunConf{ + return &instanceRunConf{ Session: awsSession, InstanceProfileName: conf.InstanceProfileName, StorageBucket: conf.StorageBucket, @@ -527,7 +584,7 @@ func newInstanceRunConf(awsSession *session.Session, conf ParallelRunConf, jobNu } } -func logTestGroups(logger logr.Logger, instancesConf []instanceRunConf) { +func logTestGroups(logger logr.Logger, instancesConf []*instanceRunConf) { testGroups := make([]string, 0, len(instancesConf)) for _, i := range instancesConf { testGroups = append(testGroups, i.Regex) @@ -535,57 +592,6 @@ func logTestGroups(logger logr.Logger, instancesConf []instanceRunConf) { logger.V(1).Info("Running tests in parallel", "testsGroups", testGroups) } -func getNonAirgappedHardwarePool(storageBucket string) ([]*api.Hardware, error) { - awsSession, err := session.NewSession() - if err != nil { - return nil, fmt.Errorf("creating aws session for tests: %v", err) - } - err = s3.DownloadToDisk(awsSession, os.Getenv(tinkerbellHardwareS3FileKeyEnvVar), storageBucket, e2eHardwareCsvFilePath) - if err != nil { - return nil, fmt.Errorf("failed to download tinkerbell hardware csv: %v", err) - } - - hardware, err := api.NewHardwareSliceFromFile(e2eHardwareCsvFilePath) - if err != nil { - return nil, fmt.Errorf("failed to get Tinkerbell hardware: %v", err) - } - return hardware, nil -} - -// Airgapped tinkerbell tests have special hardware requirements that doesn't have internet connectivity. -func getAirgappedHardwarePool(storageBucket string) ([]*api.Hardware, error) { - awsSession, err := session.NewSession() - if err != nil { - return nil, fmt.Errorf("creating aws session for tests: %v", err) - } - err = s3.DownloadToDisk(awsSession, os.Getenv(tinkerbellAirgappedHardwareS3FileKeyEnvVar), storageBucket, e2eAirgappedHardwareCsvFilePath) - if err != nil { - return nil, fmt.Errorf("downloading tinkerbell airgapped hardware csv: %v", err) - } - - hardware, err := api.NewHardwareSliceFromFile(e2eAirgappedHardwareCsvFilePath) - if err != nil { - return nil, fmt.Errorf("failed to get Tinkerbell hardware: %v", err) - } - - return hardware, nil -} - -func reserveTinkerbellHardware(conf *instanceRunConf, invCatalogue *hardwareCatalogue) error { - reservedTinkerbellHardware, err := invCatalogue.reserveHardware(conf.HardwareCount) - if err != nil { - return fmt.Errorf("timed out waiting for hardware") - } - conf.Hardware = reservedTinkerbellHardware - logTinkerbellTestHardwareInfo(conf, "Reserved") - return nil -} - -func releaseTinkerbellHardware(conf *instanceRunConf, invCatalogue *hardwareCatalogue) { - logTinkerbellTestHardwareInfo(conf, "Releasing") - invCatalogue.releaseHardware(conf.Hardware) -} - func logTinkerbellTestHardwareInfo(conf *instanceRunConf, action string) { var hardwareInfo []string for _, hardware := range conf.Hardware { diff --git a/internal/test/e2e/setup.go b/internal/test/e2e/setup.go index ee66c8bcd3c8..29919bdd41f6 100644 --- a/internal/test/e2e/setup.go +++ b/internal/test/e2e/setup.go @@ -47,7 +47,7 @@ type E2ESession struct { logger logr.Logger } -func newE2ESession(instanceId string, conf instanceRunConf) (*E2ESession, error) { +func newE2ESession(instanceId string, conf *instanceRunConf) (*E2ESession, error) { e := &E2ESession{ session: conf.Session, instanceId: instanceId, diff --git a/internal/test/e2e/testRunner.go b/internal/test/e2e/test_runner.go similarity index 75% rename from internal/test/e2e/testRunner.go rename to internal/test/e2e/test_runner.go index 9f9e9421afbb..62c5fa9a3188 100644 --- a/internal/test/e2e/testRunner.go +++ b/internal/test/e2e/test_runner.go @@ -35,9 +35,9 @@ const ( ) type TestRunner interface { - createInstance(instanceConf instanceRunConf) (string, error) - tagInstance(instanceConf instanceRunConf, key, value string) error - decommInstance(instanceRunConf) error + createInstance(instanceConf *instanceRunConf) (string, error) + tagInstance(instanceConf *instanceRunConf, key, value string) error + decommInstance(*instanceRunConf) error } type TestRunnerType string @@ -49,12 +49,8 @@ const ( func newTestRunner(runnerType TestRunnerType, config TestInfraConfig) (TestRunner, error) { if runnerType == VSphereTestRunnerType { - var err error v := &config.VSphereTestRunner - v.envMap, err = v.setEnvironment() - if err != nil { - return nil, fmt.Errorf("failed to set env for vSphere test runner: %v", err) - } + v.setEnvironment() return v, nil } else { return &config.Ec2TestRunner, nil @@ -72,7 +68,10 @@ func NewTestRunnerConfigFromFile(logger logr.Logger, configFile string) (*TestIn return nil, fmt.Errorf("failed to create test runner config from file: %v", err) } - config := TestInfraConfig{} + config, err := ReadRunnerConfig(configFile) + if err != nil { + return nil, fmt.Errorf("failed to create test runner config from file: %v", err) + } config.VSphereTestRunner.logger = logger config.Ec2TestRunner.logger = logger @@ -81,7 +80,35 @@ func NewTestRunnerConfigFromFile(logger logr.Logger, configFile string) (*TestIn return nil, fmt.Errorf("failed to create test runner config from file: %v", err) } - return &config, nil + return config, nil +} + +// ReadRunnerConfig reads the runner config from the given file. +func ReadRunnerConfig(configFile string) (*TestInfraConfig, error) { + file, err := os.ReadFile(configFile) + if err != nil { + return nil, fmt.Errorf("reading runner config: %w", err) + } + + config := &TestInfraConfig{} + err = yaml.Unmarshal(file, config) + if err != nil { + return nil, fmt.Errorf("unmarshalling runner config: %v", err) + } + + if vSphereUsername, ok := os.LookupEnv(testRunnerVCUserEnvVar); ok && len(vSphereUsername) > 0 { + config.VSphereTestRunner.Username = vSphereUsername + } else { + return nil, fmt.Errorf("missing environment variable: %s", testRunnerVCUserEnvVar) + } + + if vSpherePassword, ok := os.LookupEnv(testRunnerVCPasswordEnvVar); ok && len(vSpherePassword) > 0 { + config.VSphereTestRunner.Password = vSpherePassword + } else { + return nil, fmt.Errorf("missing environment variable: %s", testRunnerVCPasswordEnvVar) + } + + return config, nil } type testRunner struct { @@ -99,7 +126,8 @@ type VSphereTestRunner struct { testRunner ActivationId string envMap map[string]string - Url string `yaml:"url"` + + URL string `yaml:"url"` Insecure bool `yaml:"insecure"` Library string `yaml:"library"` Template string `yaml:"template"` @@ -108,34 +136,28 @@ type VSphereTestRunner struct { ResourcePool string `yaml:"resourcePool"` Network string `yaml:"network"` Folder string `yaml:"folder"` + Username string + Password string } -func (v *VSphereTestRunner) setEnvironment() (map[string]string, error) { - envMap := make(map[string]string) - if vSphereUsername, ok := os.LookupEnv(testRunnerVCUserEnvVar); ok && len(vSphereUsername) > 0 { - envMap[govcUsernameKey] = vSphereUsername - } else { - return nil, fmt.Errorf("missing environment variable: %s", testRunnerVCUserEnvVar) - } - - if vSpherePassword, ok := os.LookupEnv(testRunnerVCPasswordEnvVar); ok && len(vSpherePassword) > 0 { - envMap[govcPasswordKey] = vSpherePassword - } else { - return nil, fmt.Errorf("missing environment variable: %s", testRunnerVCPasswordEnvVar) +func (v *VSphereTestRunner) setEnvironment() { + v.envMap = map[string]string{ + govcUsernameKey: v.Username, + govcPasswordKey: v.Password, + govcURLKey: v.URL, + govcInsecure: strconv.FormatBool(v.Insecure), + govcDatacenterKey: v.Datacenter, } - - envMap[govcURLKey] = v.Url - envMap[govcInsecure] = strconv.FormatBool(v.Insecure) - envMap[govcDatacenterKey] = v.Datacenter - - v.envMap = envMap - return envMap, nil } -func (v *VSphereTestRunner) createInstance(c instanceRunConf) (string, error) { +func (v *VSphereTestRunner) createInstance(c *instanceRunConf) (string, error) { name := getTestRunnerName(v.logger, c.JobID) - ssmActivationInfo, err := ssm.CreateActivation(c.Session, name, c.InstanceProfileName) + ssmActivationInfo, err := ssm.CreateActivation( + // It's important to add the tinkerbell job tag since that's what we use to then search + // for lingering activations and instances to clean up. + c.Session, name, c.InstanceProfileName, ssm.Tag{tinkerbellJobTag, c.JobID}, + ) if err != nil { return "", fmt.Errorf("unable to create ssm activation: %v", err) } @@ -184,7 +206,7 @@ func (v *VSphereTestRunner) createInstance(c instanceRunConf) (string, error) { return *ssmInstance.InstanceId, nil } -func (e *Ec2TestRunner) createInstance(c instanceRunConf) (string, error) { +func (e *Ec2TestRunner) createInstance(c *instanceRunConf) (string, error) { name := getTestRunnerName(e.logger, c.JobID) e.logger.V(1).Info("Creating ec2 Test Runner instance", "name", name) instanceID, err := ec2.CreateInstance(c.Session, e.AmiID, key, tag, c.InstanceProfileName, e.SubnetID, name) @@ -196,10 +218,15 @@ func (e *Ec2TestRunner) createInstance(c instanceRunConf) (string, error) { return instanceID, nil } -func (v *VSphereTestRunner) tagInstance(c instanceRunConf, key, value string) error { +func (v *VSphereTestRunner) tagInstance(c *instanceRunConf, key, value string) error { vmName := getTestRunnerName(v.logger, c.JobID) vmPath := fmt.Sprintf("/%s/vm/%s/%s", v.Datacenter, v.Folder, vmName) - tag := fmt.Sprintf("%s:%s", key, value) + var tag string + if value != "" { + tag = fmt.Sprintf("%s:%s", key, value) + } else { + tag = key + } if err := vsphere.TagVirtualMachine(v.envMap, vmPath, tag); err != nil { return fmt.Errorf("failed to tag vSphere test runner: %v", err) @@ -207,7 +234,7 @@ func (v *VSphereTestRunner) tagInstance(c instanceRunConf, key, value string) er return nil } -func (e *Ec2TestRunner) tagInstance(c instanceRunConf, key, value string) error { +func (e *Ec2TestRunner) tagInstance(c *instanceRunConf, key, value string) error { err := ec2.TagInstance(c.Session, e.InstanceID, key, value) if err != nil { return fmt.Errorf("failed to tag Ec2 test runner: %v", err) @@ -215,9 +242,9 @@ func (e *Ec2TestRunner) tagInstance(c instanceRunConf, key, value string) error return nil } -func (v *VSphereTestRunner) decommInstance(c instanceRunConf) error { - _, deregisterError := ssm.DeregisterInstance(c.Session, v.InstanceID) - _, deactivateError := ssm.DeleteActivation(c.Session, v.ActivationId) +func (v *VSphereTestRunner) decommInstance(c *instanceRunConf) error { + _, deregisterError := ssm.DeregisterInstances(c.Session, v.InstanceID) + _, deactivateError := ssm.DeleteActivations(c.Session, v.ActivationId) deleteError := cleanup.VsphereRmVms(context.Background(), getTestRunnerName(v.logger, c.JobID), executables.WithGovcEnvMap(v.envMap)) if deregisterError != nil { @@ -235,7 +262,7 @@ func (v *VSphereTestRunner) decommInstance(c instanceRunConf) error { return nil } -func (e *Ec2TestRunner) decommInstance(c instanceRunConf) error { +func (e *Ec2TestRunner) decommInstance(c *instanceRunConf) error { runnerName := getTestRunnerName(e.logger, c.JobID) e.logger.V(1).Info("Terminating ec2 Test Runner instance", "instanceID", e.InstanceID, "runner", runnerName) if err := ec2.TerminateEc2Instances(c.Session, aws.StringSlice([]string{e.InstanceID})); err != nil { diff --git a/internal/test/e2e/tinkerbell.go b/internal/test/e2e/tinkerbell.go index 796cf449841b..deb6219f4486 100644 --- a/internal/test/e2e/tinkerbell.go +++ b/internal/test/e2e/tinkerbell.go @@ -1,13 +1,16 @@ package e2e import ( + "context" "fmt" "os" "regexp" + "github.com/aws/aws-sdk-go/aws/session" "github.com/go-logr/logr" "github.com/aws/eks-anywhere/internal/pkg/api" + "github.com/aws/eks-anywhere/internal/pkg/s3" "github.com/aws/eks-anywhere/internal/pkg/ssm" e2etests "github.com/aws/eks-anywhere/test/framework" ) @@ -24,6 +27,9 @@ const ( tinkerbellDefaultMaxHardwarePerE2ETest = 4 tinkerbellBootstrapInterfaceEnvVar = "T_TINKERBELL_BOOTSTRAP_INTERFACE" tinkerbellCIEnvironmentEnvVar = "T_TINKERBELL_CI_ENVIRONMENT" + + // tinkerbellJobTag is the tag used to map vm runners and SSM activations to an e2e job. + tinkerbellJobTag = "eksa-tinkerbell-e2e-job" ) // TinkerbellTest maps each Tinkbell test with the hardware count needed for the test. @@ -125,3 +131,74 @@ func getTinkerbellAirgappedTests(tests []string) []string { } return tinkerbellTests } + +// ReadTinkerbellMachinePool returns the list of baremetal machines designated for e2e tests. +func ReadTinkerbellMachinePool(session *session.Session, bucketName string) ([]*api.Hardware, error) { + hardware := []*api.Hardware{} + machines, err := nonAirgappedHardwarePool(session, bucketName) + if err != nil { + return nil, err + } + hardware = append(hardware, machines...) + + machines, err = airgappedHardwarePool(session, bucketName) + if err != nil { + return nil, err + } + hardware = append(hardware, machines...) + + return hardware, nil +} + +func nonAirgappedHardwarePool(session *session.Session, storageBucket string) ([]*api.Hardware, error) { + err := s3.DownloadToDisk(session, os.Getenv(tinkerbellHardwareS3FileKeyEnvVar), storageBucket, e2eHardwareCsvFilePath) + if err != nil { + return nil, fmt.Errorf("failed to download tinkerbell hardware csv: %v", err) + } + + hardware, err := api.ReadTinkerbellHardwareFromFile(e2eHardwareCsvFilePath) + if err != nil { + return nil, fmt.Errorf("failed to get Tinkerbell hardware: %v", err) + } + return hardware, nil +} + +// airgappedHardwarePool returns the hardware pool for airgapped tinkerbell tests. +// Airgapped tinkerbell tests have special hardware requirements that doesn't have internet connectivity. +func airgappedHardwarePool(session *session.Session, storageBucket string) ([]*api.Hardware, error) { + err := s3.DownloadToDisk(session, os.Getenv(tinkerbellAirgappedHardwareS3FileKeyEnvVar), storageBucket, e2eAirgappedHardwareCsvFilePath) + if err != nil { + return nil, fmt.Errorf("downloading tinkerbell airgapped hardware csv: %v", err) + } + + hardware, err := api.ReadTinkerbellHardwareFromFile(e2eAirgappedHardwareCsvFilePath) + if err != nil { + return nil, fmt.Errorf("failed to get Tinkerbell hardware: %v", err) + } + + return hardware, nil +} + +type TinkerbellSSMInstances struct { + // InstanceIDs is a list of SSM instance IDs created for the vm runners. + InstanceIDs []string + // ActivationIDs is a list of SSM activation IDs created for the vm runners. + ActivationIDs []string +} + +// ListTinkerbellSSMInstances returns a list of SSM instances created for the tinkerbell vm runners. +func ListTinkerbellSSMInstances(ctx context.Context, session *session.Session) (*TinkerbellSSMInstances, error) { + runnerInstances := &TinkerbellSSMInstances{} + + instances, err := ssm.ListInstancesByTags(ctx, session, ssm.Tag{Key: tinkerbellJobTag, Value: "*"}) + if err != nil { + return nil, fmt.Errorf("listing tinkerbell runners: %v", err) + } + + for _, instance := range instances { + runnerInstances.ActivationIDs = append(runnerInstances.ActivationIDs, *instance.ActivationId) + runnerInstances.InstanceIDs = append(runnerInstances.InstanceIDs, *instance.InstanceId) + } + + return runnerInstances, nil +} diff --git a/pkg/clients/vsphere/vm.go b/pkg/clients/vsphere/vm.go new file mode 100644 index 000000000000..ef17d7fdcd92 --- /dev/null +++ b/pkg/clients/vsphere/vm.go @@ -0,0 +1,6 @@ +package vsphere + +// VM is a vSphere VM. +type VM struct { + Path string `json:"path"` +} diff --git a/pkg/executables/govc.go b/pkg/executables/govc.go index 6f3c653ae224..628ced5e02e5 100644 --- a/pkg/executables/govc.go +++ b/pkg/executables/govc.go @@ -19,6 +19,7 @@ import ( "sigs.k8s.io/yaml" "github.com/aws/eks-anywhere/pkg/api/v1alpha1" + "github.com/aws/eks-anywhere/pkg/clients/vsphere" "github.com/aws/eks-anywhere/pkg/config" "github.com/aws/eks-anywhere/pkg/filewriter" "github.com/aws/eks-anywhere/pkg/logger" @@ -98,6 +99,26 @@ func WithGovcEnvMap(envMap map[string]string) GovcOpt { } } +type GovcConfig struct { + Username string + Password string + URL string + Insecure bool + Datacenter string +} + +// Configure sets up the govc executable with the provided configuration. +// This is not thread safe. +func (g *Govc) Configure(config GovcConfig) { + g.envMap = map[string]string{ + govcUsernameKey: config.Username, + govcPasswordKey: config.Password, + govcURLKey: config.URL, + govcInsecure: strconv.FormatBool(config.Insecure), + govcDatacenterKey: config.Datacenter, + } +} + func (g *Govc) exec(ctx context.Context, args ...string) (stdout bytes.Buffer, err error) { envMap, err := g.validateAndSetupCreds() if err != nil { @@ -503,7 +524,7 @@ func (g *Govc) DeleteTemplate(ctx context.Context, resourcePool, templatePath st if err := g.removeSnapshotsFromVM(ctx, templatePath); err != nil { return err } - if err := g.deleteVM(ctx, templatePath); err != nil { + if err := g.DeleteVM(ctx, templatePath); err != nil { return err } @@ -524,7 +545,7 @@ func (g *Govc) removeSnapshotsFromVM(ctx context.Context, path string) error { return nil } -func (g *Govc) deleteVM(ctx context.Context, path string) error { +func (g *Govc) DeleteVM(ctx context.Context, path string) error { if _, err := g.exec(ctx, "vm.destroy", path); err != nil { return fmt.Errorf("deleting vm: %v", err) } @@ -1218,3 +1239,27 @@ func getValueFromString(str string) (int, error) { } return numValue, nil } + +type vmsResponse struct { + Elements []vsphere.VM `json:"elements"` +} + +// ListVMs returns the list of VMs in the provided folder.er +func (g *Govc) ListVMs(ctx context.Context, folder string) ([]vsphere.VM, error) { + vmsOutput, err := g.exec(ctx, "ls", "-t", "VirtualMachine", "-json", folder) + if err != nil { + return nil, fmt.Errorf("govc returned error when listing vms: %w", err) + } + + vmsJson := vmsOutput.String() + if vmsJson == "null" || vmsJson == "" { + return nil, nil + } + + vms := &vmsResponse{} + if err = json.Unmarshal([]byte(vmsJson), vms); err != nil { + return nil, fmt.Errorf("failed unmarshalling govc response from list vms: %w", err) + } + + return vms.Elements, nil +} diff --git a/pkg/executables/govc_test.go b/pkg/executables/govc_test.go index f3c03109167e..2d3bb9b090ad 100644 --- a/pkg/executables/govc_test.go +++ b/pkg/executables/govc_test.go @@ -20,6 +20,7 @@ import ( "github.com/aws/eks-anywhere/internal/test" "github.com/aws/eks-anywhere/pkg/api/v1alpha1" + "github.com/aws/eks-anywhere/pkg/clients/vsphere" "github.com/aws/eks-anywhere/pkg/executables" mockexecutables "github.com/aws/eks-anywhere/pkg/executables/mocks" "github.com/aws/eks-anywhere/pkg/retrier" @@ -1722,3 +1723,61 @@ func TestGovcGetResourcePoolInfo(t *testing.T) { }) } } + +func TestListVMs(t *testing.T) { + testCases := []struct { + name string + folder string + vsphereResponse string + want []vsphere.VM + }{ + { + name: "null response", + folder: "my-vms", + vsphereResponse: `null`, + want: nil, + }, + { + name: "empty response", + folder: "my-vms", + vsphereResponse: `null`, + want: nil, + }, + { + name: "some vms", + folder: "my-vms", + vsphereResponse: `{ + "elements": [ + { + "name": "vm1", + "path": "/SDDC-Datacenter/vm/my-vms/vm1" + }, + { + "name": "vm2", + "path": "/SDDC-Datacenter/vm/my-vms/vm2" + } + ] +}`, + want: []vsphere.VM{ + { + Path: "/SDDC-Datacenter/vm/my-vms/vm1", + }, + { + Path: "/SDDC-Datacenter/vm/my-vms/vm2", + }, + }, + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + g := NewWithT(t) + + _, govc, executable, env := setup(t) + executable.EXPECT().ExecuteWithEnv(ctx, env, "ls", "-t", "VirtualMachine", "-json", tc.folder).Return(*bytes.NewBufferString(tc.vsphereResponse), nil) + + // g.Expect(err).NotTo(HaveOccurred()) + g.Expect(govc.ListVMs(ctx, tc.folder)).To(BeComparableTo(tc.want)) + }) + } +} diff --git a/test/framework/tinkerbell.go b/test/framework/tinkerbell.go index 1362ac3688e5..7ef7d8dc61c4 100644 --- a/test/framework/tinkerbell.go +++ b/test/framework/tinkerbell.go @@ -146,7 +146,7 @@ func (t *Tinkerbell) WithProviderUpgrade(fillers ...api.TinkerbellFiller) Cluste // CleanupResources runs a clean up the Tinkerbell machines which simply powers them down. func (t *Tinkerbell) CleanupResources(_ string) error { - return cleanup.TinkerbellTestResources(t.inventoryCsvFilePath, true) + return cleanup.PowerOffTinkerbellMachinesFromFile(t.inventoryCsvFilePath, true) } // WithKubeVersionAndOS returns a cluster config filler that sets the cluster kube version and the right image for all