Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: run infra integration tests against Talemu provider #617

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 153 additions & 0 deletions cmd/integration-test/pkg/tests/infra.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Copyright (c) 2024 Sidero Labs, Inc.
//
// Use of this software is governed by the Business Source License
// included in the LICENSE file.

package tests

import (
"context"
"fmt"
"slices"
"testing"
"time"

"github.com/cosi-project/runtime/pkg/resource"
"github.com/cosi-project/runtime/pkg/resource/rtestutils"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/siderolabs/go-retry/retry"
"github.com/stretchr/testify/require"

"github.com/siderolabs/omni/client/pkg/client"
"github.com/siderolabs/omni/client/pkg/omni/resources"
"github.com/siderolabs/omni/client/pkg/omni/resources/infra"
"github.com/siderolabs/omni/client/pkg/omni/resources/omni"
"github.com/siderolabs/omni/client/pkg/omni/resources/siderolink"
)

// AssertMachinesShouldBeProvisioned creates a machine request set and waits until all requests are fulfilled.
//
//nolint:gocognit
func AssertMachinesShouldBeProvisioned(testCtx context.Context, client *client.Client, machineCount int, machineRequestSetName,
talosVersion, infraProvider string,
) TestFunc {
return func(t *testing.T) {
ctx, cancel := context.WithTimeout(testCtx, time.Second*60)
defer cancel()

machineRequestSet, err := safe.ReaderGetByID[*omni.MachineRequestSet](ctx, client.Omni().State(), machineRequestSetName)

if !state.IsNotFoundError(err) {
require.NoError(t, err)
}

if machineRequestSet != nil {
rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName})
}

machineRequestSet = omni.NewMachineRequestSet(resources.DefaultNamespace, machineRequestSetName)

machineRequestSet.TypedSpec().Value.Extensions = []string{
"siderolabs/" + HelloWorldServiceExtensionName,
}

machineRequestSet.TypedSpec().Value.ProviderId = infraProvider
machineRequestSet.TypedSpec().Value.TalosVersion = talosVersion
machineRequestSet.TypedSpec().Value.MachineCount = int32(machineCount)

require.NoError(t, client.Omni().State().Create(ctx, machineRequestSet))

var resources safe.List[*infra.MachineRequestStatus]

err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error {
resources, err = safe.ReaderListAll[*infra.MachineRequestStatus](ctx, client.Omni().State(),
state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)),
)
if err != nil {
return err
}

if resources.Len() != machineCount {
return retry.ExpectedErrorf("provision machine count is %d, expected %d", resources.Len(), machineCount)
}

return nil
})

require.NoError(t, err)

err = retry.Constant(time.Second*60).RetryWithContext(ctx, func(ctx context.Context) error {
var machines safe.List[*omni.MachineStatus]

machines, err = safe.ReaderListAll[*omni.MachineStatus](ctx, client.Omni().State())
if err != nil {
return err
}

if machines.Len() < machineCount {
return retry.ExpectedErrorf("links count is %d, expected at least %d", resources.Len(), machineCount)
}

for r := range resources.All() {
requestedMachines := machines.FilterLabelQuery(resource.LabelEqual(omni.LabelMachineRequest, r.Metadata().ID()))

if requestedMachines.Len() == 0 {
return retry.ExpectedErrorf("machine request %q doesn't have the related link", r.Metadata().ID())
}

if requestedMachines.Len() != 1 {
return fmt.Errorf("more than one machine is labeled with %q machine request label", r.Metadata().ID())
}

m := requestedMachines.Get(0)
if m.TypedSpec().Value.Hardware == nil {
return retry.ExpectedErrorf("the machine %q is not fully provisioned", r.Metadata().ID())
}
}

return nil
})

require.NoError(t, err)
}
}

// AssertMachinesShouldBeDeprovisioned removes the machine request set and checks that all related links were deleted.
func AssertMachinesShouldBeDeprovisioned(testCtx context.Context, client *client.Client, machineRequestSetName string) TestFunc {
return func(t *testing.T) {
ctx, cancel := context.WithTimeout(testCtx, time.Second*60)
defer cancel()

requestIDs := rtestutils.ResourceIDs[*infra.MachineRequest](ctx, t, client.Omni().State(),
state.WithLabelQuery(resource.LabelEqual(omni.LabelMachineRequestSet, machineRequestSetName)),
)

links, err := safe.ReaderListAll[*siderolink.Link](ctx, client.Omni().State())

require.NoError(t, err)

linkIDs := make([]string, 0, len(requestIDs))

for l := range links.All() {
mr, ok := l.Metadata().Labels().Get(omni.LabelMachineRequest)
if !ok {
continue
}

if slices.Index(requestIDs, mr) != -1 {
linkIDs = append(linkIDs, l.Metadata().ID())
}
}

rtestutils.Destroy[*omni.MachineRequestSet](ctx, t, client.Omni().State(), []string{machineRequestSetName})

for _, id := range requestIDs {
rtestutils.AssertNoResource[*infra.MachineRequest](ctx, t, client.Omni().State(), id)
}

for _, id := range linkIDs {
rtestutils.AssertNoResource[*siderolink.Link](ctx, t, client.Omni().State(), id)
}
}
}
73 changes: 49 additions & 24 deletions cmd/integration-test/pkg/tests/tests.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ package tests

import (
"context"
"errors"
"fmt"
"log"
"net/http"
Expand Down Expand Up @@ -54,9 +53,10 @@ type TalosAPIKeyPrepareFunc func(ctx context.Context, contextName string) error
type Options struct {
RunTestPattern string

CleanupLinks bool
RunStatsCheck bool
ExpectedMachines int
CleanupLinks bool
RunStatsCheck bool
ExpectedMachines int
ProvisionMachines int

RestartAMachineFunc RestartAMachineFunc
WipeAMachineFunc WipeAMachineFunc
Expand All @@ -68,6 +68,7 @@ type Options struct {
AnotherTalosVersion string
AnotherKubernetesVersion string
OmnictlPath string
InfraProvider string
}

// Run the integration tests.
Expand Down Expand Up @@ -1144,8 +1145,6 @@ Test flow of cluster creation and scaling using cluster templates.`,
var re *regexp.Regexp

if options.RunTestPattern != "" {
var err error

if re, err = regexp.Compile(options.RunTestPattern); err != nil {
log.Printf("run test pattern parse error: %s", err)

Expand Down Expand Up @@ -1180,33 +1179,47 @@ Test flow of cluster creation and scaling using cluster templates.`,
}
}

preRunTests := []testing.InternalTest{}

if options.ProvisionMachines != 0 {
preRunTests = append(preRunTests, testing.InternalTest{
Name: "AssertMachinesShouldBeProvisioned",
F: AssertMachinesShouldBeProvisioned(ctx, rootClient, options.ProvisionMachines, "main", options.MachineOptions.TalosVersion, options.InfraProvider),
})
}

if len(preRunTests) > 0 {
if err = runTests(preRunTests); err != nil {
return err
}
}

machineSemaphore := semaphore.NewWeighted(int64(options.ExpectedMachines))

exitCode := testing.MainStart(
matchStringOnly(func(string, string) (bool, error) { return true, nil }),
makeTests(ctx, testsToRun, machineSemaphore),
nil,
nil,
nil,
).Run()
if err = runTests(makeTests(ctx, testsToRun, machineSemaphore)); err != nil {
return err
}

postRunTests := []testing.InternalTest{}

extraTests := []testing.InternalTest{}
if options.ProvisionMachines != 0 {
postRunTests = append(postRunTests, testing.InternalTest{
Name: "AssertMachinesShouldBeDeprovisioned",
F: AssertMachinesShouldBeDeprovisioned(ctx, rootClient, "main"),
})
}

if options.RunStatsCheck {
extraTests = append(extraTests, testing.InternalTest{
postRunTests = append(postRunTests, testing.InternalTest{
Name: "AssertStatsLimits",
F: AssertStatsLimits(ctx),
})
}

if len(extraTests) > 0 && exitCode == 0 {
exitCode = testing.MainStart(
matchStringOnly(func(string, string) (bool, error) { return true, nil }),
extraTests,
nil,
nil,
nil,
).Run()
if len(postRunTests) > 0 {
if err = runTests(postRunTests); err != nil {
return err
}
}

if options.CleanupLinks {
Expand All @@ -1215,8 +1228,20 @@ Test flow of cluster creation and scaling using cluster templates.`,
}
}

return nil
}

func runTests(testsToRun []testing.InternalTest) error {
exitCode := testing.MainStart(
matchStringOnly(func(string, string) (bool, error) { return true, nil }),
testsToRun,
nil,
nil,
nil,
).Run()

if exitCode != 0 {
return errors.New("test failed")
return fmt.Errorf("test failed")
}

return nil
Expand Down
20 changes: 13 additions & 7 deletions cmd/integration-test/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ var rootCmd = &cobra.Command{
testOptions := tests.Options{
RunTestPattern: rootCmdFlags.runTestPattern,

ExpectedMachines: rootCmdFlags.expectedMachines,
CleanupLinks: rootCmdFlags.cleanupLinks,
RunStatsCheck: rootCmdFlags.runStatsCheck,
ExpectedMachines: rootCmdFlags.expectedMachines,
CleanupLinks: rootCmdFlags.cleanupLinks,
RunStatsCheck: rootCmdFlags.runStatsCheck,
ProvisionMachines: rootCmdFlags.provisionMachinesCount,

MachineOptions: rootCmdFlags.machineOptions,
AnotherTalosVersion: rootCmdFlags.anotherTalosVersion,
AnotherKubernetesVersion: rootCmdFlags.anotherKubernetesVersion,
OmnictlPath: rootCmdFlags.omnictlPath,
InfraProvider: rootCmdFlags.infraProvider,
}

if rootCmdFlags.restartAMachineScript != "" {
Expand Down Expand Up @@ -116,11 +118,13 @@ func execCmd(ctx context.Context, parsedScript []string, args ...string) error {
var rootCmdFlags struct {
endpoint string
runTestPattern string
infraProvider string

expectedMachines int
parallel int64
cleanupLinks bool
runStatsCheck bool
provisionMachinesCount int
expectedMachines int
parallel int64
cleanupLinks bool
runStatsCheck bool

testsTimeout time.Duration

Expand Down Expand Up @@ -158,6 +162,8 @@ func init() {
rootCmd.Flags().DurationVarP(&rootCmdFlags.testsTimeout, "timeout", "t", time.Hour, "tests global timeout")
rootCmd.Flags().BoolVar(&rootCmdFlags.cleanupLinks, "cleanup-links", false, "remove all links after the tests are complete")
rootCmd.Flags().BoolVar(&rootCmdFlags.runStatsCheck, "run-stats-check", false, "runs stats check after the test is complete")
rootCmd.Flags().IntVar(&rootCmdFlags.provisionMachinesCount, "provision-machines", 0, "provisions machines through the infrastructure provider")
rootCmd.Flags().StringVar(&rootCmdFlags.infraProvider, "infra-provider", "talemu", "use infra provider with the specified ID when provisioning the machines")
}

// withContext wraps with CLI context.
Expand Down
6 changes: 2 additions & 4 deletions hack/test/integration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,15 @@ KERNEL_ARGS="siderolink.api=grpc://$LOCAL_IP:8090?jointoken=${JOIN_TOKEN} talos.
if [[ "${RUN_TALEMU_TESTS:-false}" == "true" ]]; then
PROMETHEUS_CONTAINER=$(docker run --network host -p "9090:9090" -v "$(pwd)/hack/compose/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml" -it --rm -d prom/prometheus)

TALEMU_CONTAINER=$(docker run --network host --cap-add=NET_ADMIN -it --rm -d ghcr.io/siderolabs/talemu:latest --kernel-args="${KERNEL_ARGS}" --machines=30)

sleep 10
TALEMU_CONTAINER=$(docker run --network host --cap-add=NET_ADMIN -it --rm -d ghcr.io/siderolabs/talemu-infra-provider:latest --create-service-account --omni-api-endpoint=https://$LOCAL_IP:8099)

SSL_CERT_DIR=hack/certs:/etc/ssl/certs \
${ARTIFACTS}/integration-test-linux-amd64 \
--endpoint https://my-instance.localhost:8099 \
--talos-version=${TALOS_VERSION} \
--omnictl-path=${ARTIFACTS}/omnictl-linux-amd64 \
--expected-machines=30 \
--cleanup-links \
--provision-machines=30 \
--run-stats-check \
-t 4m \
-p 10 \
Expand Down