Skip to content

Commit

Permalink
Add timeout to the e2e command (aws#5649)
Browse files Browse the repository at this point in the history
Co-authored-by: Abhinav Pandey <abhinavmpandey08@gmail.com>
  • Loading branch information
eks-distro-pr-bot and abhinavmpandey08 authored Apr 14, 2023
1 parent b6468dc commit 156d873
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 29 deletions.
36 changes: 22 additions & 14 deletions internal/pkg/ssm/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ package ssm

import (
"fmt"
"math"
"strconv"
"time"

"github.com/aws/aws-sdk-go/aws"
Expand All @@ -17,9 +19,10 @@ const ssmLogGroup = "/eks-anywhere/test/e2e"

var initE2EDirCommand = "mkdir -p /home/e2e/bin && cd /home/e2e"

func WaitForSSMReady(session *session.Session, instanceId string) error {
// WaitForSSMReady waits for the SSM command to be ready.
func WaitForSSMReady(session *session.Session, instanceID string, timeout time.Duration) error {
err := retrier.Retry(10, 20*time.Second, func() error {
return Run(session, logr.Discard(), instanceId, "ls")
return Run(session, logr.Discard(), instanceID, "ls", timeout)
})
if err != nil {
return fmt.Errorf("waiting for ssm to be ready: %v", err)
Expand Down Expand Up @@ -53,8 +56,9 @@ var nonFinalStatuses = map[string]struct{}{
ssm.CommandInvocationStatusInProgress: {}, ssm.CommandInvocationStatusDelayed: {}, ssm.CommandInvocationStatusPending: {},
}

func Run(session *session.Session, logger logr.Logger, instanceId, command string, opts ...CommandOpt) error {
o, err := RunCommand(session, logger, instanceId, command, opts...)
// Run runs the command using SSM on the instance corresponding to the instanceID.
func Run(session *session.Session, logger logr.Logger, instanceID, command string, timeout time.Duration, opts ...CommandOpt) error {
o, err := RunCommand(session, logger, instanceID, command, timeout, opts...)
if err != nil {
return err
}
Expand All @@ -65,21 +69,22 @@ func Run(session *session.Session, logger logr.Logger, instanceId, command strin
return nil
}

func RunCommand(session *session.Session, logger logr.Logger, instanceId, command string, opts ...CommandOpt) (*RunOutput, error) {
// RunCommand runs the command using SSM on the instance corresponding to the instanceID.
func RunCommand(session *session.Session, logger logr.Logger, instanceID, command string, timeout time.Duration, opts ...CommandOpt) (*RunOutput, error) {
service := ssm.New(session)

result, err := sendCommand(service, logger, instanceId, command, opts...)
result, err := sendCommand(service, logger, instanceID, command, timeout, opts...)
if err != nil {
return nil, err
}

commandIn := &ssm.GetCommandInvocationInput{
CommandId: result.Command.CommandId,
InstanceId: aws.String(instanceId),
InstanceId: aws.String(instanceID),
}

// Make sure ssm send command is registered
logger.Info("Waiting for ssm command to be registered", "commandId", commandIn.CommandId, "instanceId", commandIn.InstanceId)
logger.Info("Waiting for ssm command to be registered", "commandId", commandIn.CommandId, "instanceID", commandIn.InstanceId)
err = retrier.Retry(10, 5*time.Second, func() error {
_, err := service.GetCommandInvocation(commandIn)
if err != nil {
Expand All @@ -94,7 +99,10 @@ func RunCommand(session *session.Session, logger logr.Logger, instanceId, comman

logger.Info("Waiting for ssm command to finish")
var commandOut *ssm.GetCommandInvocationOutput
r := retrier.New(300*time.Minute, retrier.WithMaxRetries(2160, 60*time.Second))

// Making the retrier wait for longer than the provided SSM timeout to make sure
// we always get the output results.
r := retrier.New(timeout+5*time.Minute, retrier.WithMaxRetries(math.MaxInt, 60*time.Second))
err = r.Retry(func() error {
var err error
commandOut, err = service.GetCommandInvocation(commandIn)
Expand All @@ -117,19 +125,19 @@ func RunCommand(session *session.Session, logger logr.Logger, instanceId, comman
return buildRunOutput(commandOut), nil
}

func sendCommand(service *ssm.SSM, logger logr.Logger, instanceId, command string, opts ...CommandOpt) (*ssm.SendCommandOutput, error) {
func sendCommand(service *ssm.SSM, logger logr.Logger, instanceID, command string, timeout time.Duration, opts ...CommandOpt) (*ssm.SendCommandOutput, error) {
in := &ssm.SendCommandInput{
DocumentName: aws.String("AWS-RunShellScript"),
InstanceIds: []*string{aws.String(instanceId)},
Parameters: map[string][]*string{"commands": {aws.String(initE2EDirCommand), aws.String(command)}, "executionTimeout": {aws.String("18000")}},
InstanceIds: []*string{aws.String(instanceID)},
Parameters: map[string][]*string{"commands": {aws.String(initE2EDirCommand), aws.String(command)}, "executionTimeout": {aws.String(strconv.FormatFloat(timeout.Seconds(), 'f', 0, 64))}},
}

for _, opt := range opts {
opt(in)
}

var result *ssm.SendCommandOutput
r := retrier.New(300*time.Minute, retrier.WithRetryPolicy(func(totalRetries int, err error) (retry bool, wait time.Duration) {
r := retrier.New(timeout, retrier.WithRetryPolicy(func(totalRetries int, err error) (retry bool, wait time.Duration) {
if request.IsErrorThrottle(err) && totalRetries < 60 {
return true, 60 * time.Second
}
Expand All @@ -152,7 +160,7 @@ func sendCommand(service *ssm.SSM, logger logr.Logger, instanceId, command strin
if in.OutputS3BucketName != nil {
logger.V(4).Info(
"SSM command output to S3", "url",
fmt.Sprintf("s3://%s/%s/%s/%s/awsrunShellScript/0.awsrunShellScript/stderr", *in.OutputS3BucketName, *in.OutputS3KeyPrefix, *result.Command.CommandId, instanceId),
fmt.Sprintf("s3://%s/%s/%s/%s/awsrunShellScript/0.awsrunShellScript/stderr", *in.OutputS3BucketName, *in.OutputS3KeyPrefix, *result.Command.CommandId, instanceID),
)
}

Expand Down
6 changes: 3 additions & 3 deletions internal/test/e2e/artifacts.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func (e *E2ESession) uploadGeneratedFilesFromInstance(testName string) {
e.generatedArtifactsBucketPath(), testName,
).recursive().String()

if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
e.logger.Error(err, "error uploading log files from instance")
} else {
e.logger.V(1).Info("Successfully uploaded log files to S3")
Expand All @@ -34,7 +34,7 @@ func (e *E2ESession) uploadDiagnosticArchiveFromInstance(testName string) {
e.generatedArtifactsBucketPath(), testName,
).recursive().exclude("*").include(bundleNameFormat).String()

if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
e.logger.Error(err, "error uploading diagnostic bundle from instance")
} else {
e.logger.V(1).Info("Successfully uploaded diagnostic bundle files to S3")
Expand All @@ -48,7 +48,7 @@ func (e *E2ESession) uploadJUnitReportFromInstance(testName string) {
e.generatedArtifactsBucketPath(), testName,
).recursive().exclude("*").include(junitFile).String()

if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
e.logger.Error(err, "error uploading JUnit report from instance")
} else {
e.logger.V(1).Info("Successfully uploaded JUnit report files to S3")
Expand Down
6 changes: 3 additions & 3 deletions internal/test/e2e/fluxGit.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func (e *E2ESession) writeFileToInstance(file fileFromBytes) error {
e.logger.V(1).Info("Writing bytes to file in instance", "file", file.dstPath)

command := fmt.Sprintf("echo $'%s' >> %s && chmod %d %[2]s", file.contentString(), file.dstPath, file.permission)
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("writing file in instance: %v", err)
}
e.logger.V(1).Info("Successfully wrote file", "file", file.dstPath)
Expand All @@ -94,7 +94,7 @@ func (e *E2ESession) downloadFileInInstance(file s3Files) error {
e.logger.V(1).Info("Downloading from s3 in instance", "file", file.key)

command := fmt.Sprintf("aws s3 cp s3://%s/%s %s && chmod %d %[3]s", e.storageBucket, file.key, file.dstPath, file.permission)
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("downloading file in instance: %v", err)
}
e.logger.V(1).Info("Successfully downloaded file", "file", file.key)
Expand All @@ -105,7 +105,7 @@ func (e *E2ESession) downloadFileInInstance(file s3Files) error {
func (e *E2ESession) setUpSshAgent(privateKeyFile string) error {
command := fmt.Sprintf("eval $(ssh-agent -s) ssh-add %s", privateKeyFile)

if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("starting SSH agent on instance: %v", err)
}
e.logger.V(1).Info("Successfully started SSH agent on instance")
Expand Down
2 changes: 1 addition & 1 deletion internal/test/e2e/oidc.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ func (e *E2ESession) downloadSignerKeyInInstance(folder string) (pathInInstance
e.logger.V(1).Info("Downloading from s3 in instance", "key", saSignerKey)
command := fmt.Sprintf("aws s3 cp s3://%s/%s ./%s", e.storageBucket, saSignerKey, saSignerPath)

if err = ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err = ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return "", fmt.Errorf("downloading signer key in instance: %v", err)
}
e.logger.V(1).Info("Successfully downloaded signer key")
Expand Down
4 changes: 2 additions & 2 deletions internal/test/e2e/registryMirror.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func (e *E2ESession) setupRegistryMirrorEnv(testRegex string) error {
func (e *E2ESession) mountRegistryCert(cert string, endpoint string) error {
command := fmt.Sprintf("sudo mkdir -p /etc/docker/certs.d/%s", endpoint)

if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("creating directory in instance: %v", err)
}
decodedCert, err := base64.StdEncoding.DecodeString(cert)
Expand All @@ -86,7 +86,7 @@ func (e *E2ESession) mountRegistryCert(cert string, endpoint string) error {
}
command = fmt.Sprintf("sudo cat <<EOF>> /etc/docker/certs.d/%s/ca.crt\n%s\nEOF", endpoint, string(decodedCert))

if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("mounting certificate in instance: %v", err)
}

Expand Down
11 changes: 10 additions & 1 deletion internal/test/e2e/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strconv"
"strings"
"sync"
"time"

"github.com/aws/aws-sdk-go/aws/session"
"github.com/go-logr/logr"
Expand All @@ -24,6 +25,13 @@ const (

maxIPPoolSize = 10
minIPPoolSize = 1

// Default timeout for E2E test instance.
e2eTimeout = 300 * time.Minute
e2eSSMTimeoutPadding = 10 * time.Minute

// Default timeout used for all SSM commands besides running the actual E2E test.
ssmTimeout = 10 * time.Minute
)

type ParallelRunConf struct {
Expand Down Expand Up @@ -205,7 +213,7 @@ func (e *E2ESession) runTests(regex string) (testCommandResult *testCommandResul
command := "GOVERSION=go1.16.6 gotestsum --junitfile=junit-testing.xml --raw-command --format=standard-verbose --hide-summary=all --ignore-non-json-output-lines -- test2json -t -p e2e ./bin/e2e.test -test.v"

if regex != "" {
command = fmt.Sprintf("%s -test.run \"%s\"", command, regex)
command = fmt.Sprintf("%s -test.run \"%s\" -test.timeout %s", command, regex, e2eTimeout)
}

command = e.commandWithEnvVars(command)
Expand All @@ -217,6 +225,7 @@ func (e *E2ESession) runTests(regex string) (testCommandResult *testCommandResul
e.logger.V(4),
e.instanceId,
command,
e2eTimeout+e2eSSMTimeoutPadding,
opt,
)
if err != nil {
Expand Down
8 changes: 4 additions & 4 deletions internal/test/e2e/setup.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func (e *E2ESession) setup(regex string) error {
}

e.logger.V(1).Info("Waiting until SSM is ready")
err = ssm.WaitForSSMReady(e.session, e.instanceId)
err = ssm.WaitForSSMReady(e.session, e.instanceId, ssmTimeout)
if err != nil {
return fmt.Errorf("waiting for ssm in new instance: %v", err)
}
Expand Down Expand Up @@ -184,7 +184,7 @@ func (e *E2ESession) setup(regex string) error {
func (e *E2ESession) updateFSInotifyResources() error {
command := fmt.Sprintf("sudo sysctl fs.inotify.max_user_watches=%v && sudo sysctl fs.inotify.max_user_instances=%v", maxUserWatches, maxUserInstances)

if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("updating fs inotify resources: %v", err)
}
e.logger.V(1).Info("Successfully updated the fs inotify user watches and instances")
Expand Down Expand Up @@ -230,7 +230,7 @@ func (e *E2ESession) downloadRequiredFileInInstance(file string) error {
e.logger.V(1).Info("Downloading from s3 in instance", "file", file)

command := fmt.Sprintf("aws s3 cp s3://%s/%s/%[3]s ./bin/ && chmod 645 ./bin/%[3]s", e.storageBucket, e.jobId, file)
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("downloading file in instance: %v", err)
}
e.logger.V(1).Info("Successfully downloaded file")
Expand All @@ -251,7 +251,7 @@ func (e *E2ESession) downloadRequiredFilesInInstance() error {
func (e *E2ESession) createTestNameFile(testName string) error {
command := fmt.Sprintf("echo \"%s\" > %s", testName, testNameFile)

if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("creating test name file in instance: %v", err)
}
e.logger.V(1).Info("Successfully created test name file")
Expand Down
2 changes: 1 addition & 1 deletion internal/test/e2e/tinkerbell.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func (e *E2ESession) setTinkerbellBootstrapIPInInstance(tinkInterface string) er
e.logger.V(1).Info("Setting Tinkerbell Bootstrap IP in instance")

command := fmt.Sprintf("export T_TINKERBELL_BOOTSTRAP_IP=$(/sbin/ip -o -4 addr list %s | awk '{print $4}' | cut -d/ -f1) && echo T_TINKERBELL_BOOTSTRAP_IP=\"$T_TINKERBELL_BOOTSTRAP_IP\" | tee -a /etc/environment", tinkInterface)
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command); err != nil {
if err := ssm.Run(e.session, logr.Discard(), e.instanceId, command, ssmTimeout); err != nil {
return fmt.Errorf("setting tinkerbell boostrap ip: %v", err)
}

Expand Down

0 comments on commit 156d873

Please sign in to comment.