diff --git a/cmd/integration-test/pkg/tests/infra.go b/cmd/integration-test/pkg/tests/infra.go new file mode 100644 index 00000000..a906ca70 --- /dev/null +++ b/cmd/integration-test/pkg/tests/infra.go @@ -0,0 +1,153 @@ +// Copyright (c) 2024 Sidero Labs, Inc. +// +// Use of this software is governed by the Business Source License +// included in the LICENSE file. + +package tests + +import ( + "context" + "fmt" + "slices" + "testing" + "time" + + "github.com/cosi-project/runtime/pkg/resource" + "github.com/cosi-project/runtime/pkg/resource/rtestutils" + "github.com/cosi-project/runtime/pkg/safe" + "github.com/cosi-project/runtime/pkg/state" + "github.com/siderolabs/go-retry/retry" + "github.com/stretchr/testify/require" + + "github.com/siderolabs/omni/client/pkg/client" + "github.com/siderolabs/omni/client/pkg/omni/resources" + "github.com/siderolabs/omni/client/pkg/omni/resources/infra" + "github.com/siderolabs/omni/client/pkg/omni/resources/omni" + "github.com/siderolabs/omni/client/pkg/omni/resources/siderolink" +) + +// AssertMachinesShouldBeProvisioned creates a machine request set and waits until all requests are fulfilled. +// +//nolint:gocognit +func AssertMachinesShouldBeProvisioned(testCtx context.Context, client *client.Client, machineCount int, machineRequestSetName, + talosVersion, infraProvider string, +) TestFunc { + return func(t *testing.T) { + ctx, cancel := context.WithTimeout(testCtx, time.Second*60) + defer cancel() + + machineRequestSet, err := safe.ReaderGetByID[*omni.MachineRequestSet](ctx, client.Omni().State(), machineRequestSetName) + + if !state.IsNotFoundError(err) { + require.NoError(t, err) + } + + if machineRequestSet != nil { + rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName}) + } + + machineRequestSet = omni.NewMachineRequestSet(resources.DefaultNamespace, machineRequestSetName) + + machineRequestSet.TypedSpec().Value.Extensions = []string{ + "siderolabs/" + HelloWorldServiceExtensionName, + } + + machineRequestSet.TypedSpec().Value.ProviderId = infraProvider + machineRequestSet.TypedSpec().Value.TalosVersion = talosVersion + machineRequestSet.TypedSpec().Value.MachineCount = int32(machineCount) + + require.NoError(t, client.Omni().State().Create(ctx, machineRequestSet)) + + var resources safe.List[*infra.MachineRequestStatus] + + err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error { + resources, err = safe.ReaderListAll[*infra.MachineRequestStatus](ctx, client.Omni().State(), + state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)), + ) + if err != nil { + return err + } + + if resources.Len() != machineCount { + return retry.ExpectedErrorf("provision machine count is %d, expected %d", resources.Len(), machineCount) + } + + return nil + }) + + require.NoError(t, err) + + err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error { + var machines safe.List[*omni.MachineStatus] + + machines, err = safe.ReaderListAll[*omni.MachineStatus](ctx, client.Omni().State()) + if err != nil { + return err + } + + if machines.Len() < machineCount { + return retry.ExpectedErrorf("links count is %d, expected at least %d", resources.Len(), machineCount) + } + + for r := range resources.All() { + requestedMachines := machines.FilterLabelQuery(resource.LabelEqual(omni.LabelMachineRequest, r.Metadata().ID())) + + if requestedMachines.Len() == 0 { + return retry.ExpectedErrorf("machine request %q doesn't have the related link", r.Metadata().ID()) + } + + if requestedMachines.Len() != 1 { + return fmt.Errorf("more than one machine is labeled with %q machine request label", r.Metadata().ID()) + } + + m := requestedMachines.Get(0) + if m.TypedSpec().Value.Hardware == nil { + return retry.ExpectedErrorf("the machine %q is not fully provisioned", r.Metadata().ID()) + } + } + + return nil + }) + + require.NoError(t, err) + } +} + +// AssertMachinesShouldBeDeprovisioned removes the machine request set and checks that all related links were deleted. +func AssertMachinesShouldBeDeprovisioned(testCtx context.Context, client *client.Client, machineRequestSetName string) TestFunc { + return func(t *testing.T) { + ctx, cancel := context.WithTimeout(testCtx, time.Second*60) + defer cancel() + + requestIDs := rtestutils.ResourceIDs[*infra.MachineRequest](ctx, t, client.Omni().State(), + state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)), + ) + + links, err := safe.ReaderListAll[*siderolink.Link](ctx, client.Omni().State()) + + require.NoError(t, err) + + linkIDs := make([]string, 0, len(requestIDs)) + + for l := range links.All() { + mr, ok := l.Metadata().Labels().Get(omni.LabelMachineRequest) + if !ok { + continue + } + + if slices.Index(requestIDs, mr) != -1 { + linkIDs = append(linkIDs, l.Metadata().ID()) + } + } + + rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName}) + + for _, id := range requestIDs { + rtestutils.AssertNoResource[*infra.MachineRequest](ctx, t, client.Omni().State(), id) + } + + for _, id := range linkIDs { + rtestutils.AssertNoResource[*siderolink.Link](ctx, t, client.Omni().State(), id) + } + } +} diff --git a/cmd/integration-test/pkg/tests/tests.go b/cmd/integration-test/pkg/tests/tests.go index 6fbc9329..68f0d632 100644 --- a/cmd/integration-test/pkg/tests/tests.go +++ b/cmd/integration-test/pkg/tests/tests.go @@ -8,7 +8,6 @@ package tests import ( "context" - "errors" "fmt" "log" "net/http" @@ -54,9 +53,10 @@ type TalosAPIKeyPrepareFunc func(ctx context.Context, contextName string) error type Options struct { RunTestPattern string - CleanupLinks bool - RunStatsCheck bool - ExpectedMachines int + CleanupLinks bool + RunStatsCheck bool + ExpectedMachines int + ProvisionMachines int RestartAMachineFunc RestartAMachineFunc WipeAMachineFunc WipeAMachineFunc @@ -68,6 +68,7 @@ type Options struct { AnotherTalosVersion string AnotherKubernetesVersion string OmnictlPath string + InfraProvider string } // Run the integration tests. @@ -1144,8 +1145,6 @@ Test flow of cluster creation and scaling using cluster templates.`, var re *regexp.Regexp if options.RunTestPattern != "" { - var err error - if re, err = regexp.Compile(options.RunTestPattern); err != nil { log.Printf("run test pattern parse error: %s", err) @@ -1180,33 +1179,47 @@ Test flow of cluster creation and scaling using cluster templates.`, } } + preRunTests := []testing.InternalTest{} + + if options.ProvisionMachines != 0 { + preRunTests = append(preRunTests, testing.InternalTest{ + Name: "AssertMachinesShouldBeProvisioned", + F: AssertMachinesShouldBeProvisioned(ctx, rootClient, options.ProvisionMachines, "main", options.MachineOptions.TalosVersion, options.InfraProvider), + }) + } + + if len(preRunTests) > 0 { + if err = runTests(preRunTests); err != nil { + return err + } + } + machineSemaphore := semaphore.NewWeighted(int64(options.ExpectedMachines)) - exitCode := testing.MainStart( - matchStringOnly(func(string, string) (bool, error) { return true, nil }), - makeTests(ctx, testsToRun, machineSemaphore), - nil, - nil, - nil, - ).Run() + if err = runTests(makeTests(ctx, testsToRun, machineSemaphore)); err != nil { + return err + } + + postRunTests := []testing.InternalTest{} - extraTests := []testing.InternalTest{} + if options.ProvisionMachines != 0 { + postRunTests = append(postRunTests, testing.InternalTest{ + Name: "AssertMachinesShouldBeDeprovisioned", + F: AssertMachinesShouldBeDeprovisioned(ctx, rootClient, "main"), + }) + } if options.RunStatsCheck { - extraTests = append(extraTests, testing.InternalTest{ + postRunTests = append(postRunTests, testing.InternalTest{ Name: "AssertStatsLimits", F: AssertStatsLimits(ctx), }) } - if len(extraTests) > 0 && exitCode == 0 { - exitCode = testing.MainStart( - matchStringOnly(func(string, string) (bool, error) { return true, nil }), - extraTests, - nil, - nil, - nil, - ).Run() + if len(postRunTests) > 0 { + if err = runTests(postRunTests); err != nil { + return err + } } if options.CleanupLinks { @@ -1215,8 +1228,20 @@ Test flow of cluster creation and scaling using cluster templates.`, } } + return nil +} + +func runTests(testsToRun []testing.InternalTest) error { + exitCode := testing.MainStart( + matchStringOnly(func(string, string) (bool, error) { return true, nil }), + testsToRun, + nil, + nil, + nil, + ).Run() + if exitCode != 0 { - return errors.New("test failed") + return fmt.Errorf("test failed") } return nil diff --git a/cmd/integration-test/root.go b/cmd/integration-test/root.go index 80bfb9a1..d3639749 100644 --- a/cmd/integration-test/root.go +++ b/cmd/integration-test/root.go @@ -42,14 +42,16 @@ var rootCmd = &cobra.Command{ testOptions := tests.Options{ RunTestPattern: rootCmdFlags.runTestPattern, - ExpectedMachines: rootCmdFlags.expectedMachines, - CleanupLinks: rootCmdFlags.cleanupLinks, - RunStatsCheck: rootCmdFlags.runStatsCheck, + ExpectedMachines: rootCmdFlags.expectedMachines, + CleanupLinks: rootCmdFlags.cleanupLinks, + RunStatsCheck: rootCmdFlags.runStatsCheck, + ProvisionMachines: rootCmdFlags.provisionMachinesCount, MachineOptions: rootCmdFlags.machineOptions, AnotherTalosVersion: rootCmdFlags.anotherTalosVersion, AnotherKubernetesVersion: rootCmdFlags.anotherKubernetesVersion, OmnictlPath: rootCmdFlags.omnictlPath, + InfraProvider: rootCmdFlags.infraProvider, } if rootCmdFlags.restartAMachineScript != "" { @@ -116,11 +118,13 @@ func execCmd(ctx context.Context, parsedScript []string, args ...string) error { var rootCmdFlags struct { endpoint string runTestPattern string + infraProvider string - expectedMachines int - parallel int64 - cleanupLinks bool - runStatsCheck bool + provisionMachinesCount int + expectedMachines int + parallel int64 + cleanupLinks bool + runStatsCheck bool testsTimeout time.Duration @@ -158,6 +162,8 @@ func init() { rootCmd.Flags().DurationVarP(&rootCmdFlags.testsTimeout, "timeout", "t", time.Hour, "tests global timeout") rootCmd.Flags().BoolVar(&rootCmdFlags.cleanupLinks, "cleanup-links", false, "remove all links after the tests are complete") rootCmd.Flags().BoolVar(&rootCmdFlags.runStatsCheck, "run-stats-check", false, "runs stats check after the test is complete") + rootCmd.Flags().IntVar(&rootCmdFlags.provisionMachinesCount, "provision-machines", 0, "provisions machines through the infrastructure provider") + rootCmd.Flags().StringVar(&rootCmdFlags.infraProvider, "infra-provider", "talemu", "use infra provider with the specified ID when provisioning the machines") } // withContext wraps with CLI context. diff --git a/hack/test/integration.sh b/hack/test/integration.sh index 13c3425f..72f4015e 100755 --- a/hack/test/integration.sh +++ b/hack/test/integration.sh @@ -129,9 +129,7 @@ KERNEL_ARGS="siderolink.api=grpc://$LOCAL_IP:8090?jointoken=${JOIN_TOKEN} talos. if [[ "${RUN_TALEMU_TESTS:-false}" == "true" ]]; then PROMETHEUS_CONTAINER=$(docker run --network host -p "9090:9090" -v "$(pwd)/hack/compose/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml" -it --rm -d prom/prometheus) - TALEMU_CONTAINER=$(docker run --network host --cap-add=NET_ADMIN -it --rm -d ghcr.io/siderolabs/talemu:latest --kernel-args="${KERNEL_ARGS}" --machines=30) - - sleep 10 + TALEMU_CONTAINER=$(docker run --network host --cap-add=NET_ADMIN -it --rm -d ghcr.io/siderolabs/talemu-infra-provider:latest --create-service-account --omni-api-endpoint=https://$LOCAL_IP:8099) SSL_CERT_DIR=hack/certs:/etc/ssl/certs \ ${ARTIFACTS}/integration-test-linux-amd64 \ @@ -139,7 +137,7 @@ if [[ "${RUN_TALEMU_TESTS:-false}" == "true" ]]; then --talos-version=${TALOS_VERSION} \ --omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \ --expected-machines=30 \ - --cleanup-links \ + --provision-machines=30 \ --run-stats-check \ -t 4m \ -p 10 \