Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests: Stop and Wait for workspace at the end of each tests #12572

Merged
merged 4 commits into from
Sep 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .werft/workspace-run-integration-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ args=()
args+=( "-kubeconfig=/home/gitpod/.kube/config" )
args+=( "-namespace=default" )
[[ "$USERNAME" != "" ]] && args+=( "-username=$USERNAME" )
args+=( "-timeout=60m" )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this enough time? I ask because the werft job ran for 67 minutes.
https://werft.gitpod-dev.com/job/gitpod-custom-to-stablization-test.17

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is, because these args pass each test component.

args+=( "-p=1" )
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this to run each test binary in parallel?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, opposite. this makes the test serial.


WK_TEST_LIST=(/workspace/test/tests/components/content-service /workspace/test/tests/components/image-builder /workspace/test/tests/components/ws-daemon /workspace/test/tests/components/ws-manager /workspace/test/tests/workspace)
for TEST_PATH in "${WK_TEST_LIST[@]}"
Expand Down
36 changes: 24 additions & 12 deletions test/pkg/integration/integration.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"net/rpc"
"os"
Expand Down Expand Up @@ -53,14 +55,23 @@ func NewPodExec(config rest.Config, clientset *kubernetes.Clientset) *PodExec {
}

func (p *PodExec) PodCopyFile(src string, dst string, containername string) (*bytes.Buffer, *bytes.Buffer, *bytes.Buffer, error) {
ioStreams, in, out, errOut := genericclioptions.NewTestIOStreams()
copyOptions := kubectlcp.NewCopyOptions(ioStreams)
copyOptions.Clientset = p.Clientset
copyOptions.ClientConfig = p.RestConfig
copyOptions.Container = containername
err := copyOptions.Run([]string{src, dst})
if err != nil {
return nil, nil, nil, fmt.Errorf("Could not run copy operation: %v", err)
var in, out, errOut *bytes.Buffer
var ioStreams genericclioptions.IOStreams
for {
ioStreams, in, out, errOut = genericclioptions.NewTestIOStreams()
copyOptions := kubectlcp.NewCopyOptions(ioStreams)
copyOptions.Clientset = p.Clientset
copyOptions.ClientConfig = p.RestConfig
copyOptions.Container = containername
err := copyOptions.Run([]string{src, dst})
if err != nil {
if !errors.Is(err, io.EOF) {
return nil, nil, nil, fmt.Errorf("Could not run copy operation: %v", err)
}
time.Sleep(10 * time.Second)
continue
}
break
}
return in, out, errOut, nil
}
Expand Down Expand Up @@ -187,20 +198,21 @@ func Instrument(component ComponentType, agentName string, namespace string, kub
}

execErrs := make(chan error, 1)
go func() {
execF := func() {
defer close(execErrs)
_, _, _, execErr := podExec.ExecCmd(cmd, podName, namespace, containerName)
if execErr != nil {
execErrs <- execErr
}
}()
}
go execF()
select {
case err := <-execErrs:
if err != nil {
return nil, closer, err
}
return nil, closer, fmt.Errorf("agent stopped unexepectedly")
case <-time.After(1 * time.Second):
case <-time.After(30 * time.Second):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is, this is the wait time in-between test binaries. Yes? In other words, this is not the wait time each individual test that we run.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I accused this time means waiting time for the start-up of the agent

}

ctx, cancel := context.WithCancel(context.Background())
Expand Down Expand Up @@ -230,7 +242,7 @@ func Instrument(component ComponentType, agentName string, namespace string, kub

var res *rpc.Client
var lastError error
waitErr := wait.PollImmediate(5*time.Second, 2*time.Minute, func() (bool, error) {
waitErr := wait.PollImmediate(5*time.Second, 3*time.Minute, func() (bool, error) {
res, lastError = rpc.DialHTTP("tcp", fmt.Sprintf("localhost:%d", localAgentPort))
if lastError != nil {
return false, nil
Expand Down
122 changes: 56 additions & 66 deletions test/pkg/integration/workspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,14 @@ package integration

import (
"context"
"fmt"
"io"
"strings"
"sync"
"time"

"github.com/google/uuid"
"golang.org/x/xerrors"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"

"github.com/gitpod-io/gitpod/common-go/namegen"
csapi "github.com/gitpod-io/gitpod/content-service/api"
Expand All @@ -28,7 +24,7 @@ import (

const (
gitpodBuiltinUserID = "builtin-user-workspace-probe-0000000"
perCallTimeout = 1 * time.Minute
perCallTimeout = 5 * time.Minute
)

type launchWorkspaceDirectlyOptions struct {
Expand Down Expand Up @@ -97,48 +93,53 @@ type LaunchWorkspaceDirectlyResult struct {
// LaunchWorkspaceDirectly starts a workspace pod by talking directly to ws-manager.
// Whenever possible prefer this function over LaunchWorkspaceFromContextURL, because
// it has fewer prerequisites.
func LaunchWorkspaceDirectly(ctx context.Context, api *ComponentAPI, opts ...LaunchWorkspaceDirectlyOpt) (*LaunchWorkspaceDirectlyResult, error) {
func LaunchWorkspaceDirectly(ctx context.Context, api *ComponentAPI, opts ...LaunchWorkspaceDirectlyOpt) (*LaunchWorkspaceDirectlyResult, func(waitForStop bool) error, error) {
options := launchWorkspaceDirectlyOptions{
BaseImage: "docker.io/gitpod/workspace-full:latest",
}
for _, o := range opts {
err := o(&options)
if err != nil {
return nil, err
return nil, nil, err
}
}

instanceID, err := uuid.NewRandom()
if err != nil {
return nil, err
return nil, nil, err

}
workspaceID, err := namegen.GenerateWorkspaceID()
if err != nil {
return nil, err

return nil, nil, err
}

var workspaceImage string
if options.BaseImage != "" {
workspaceImage, err = resolveOrBuildImage(ctx, api, options.BaseImage)
if err != nil {
return nil, xerrors.Errorf("cannot resolve base image: %v", err)
for {
workspaceImage, err = resolveOrBuildImage(ctx, api, options.BaseImage)
if st, ok := status.FromError(err); ok && st.Code() == codes.Unavailable {
time.Sleep(5 * time.Second)
continue
} else if err != nil {
return nil, nil, xerrors.Errorf("cannot resolve base image: %v", err)
}
break
}
}
if workspaceImage == "" {
return nil, xerrors.Errorf("cannot start workspaces without a workspace image (required by registry-facade resolver)")
return nil, nil, xerrors.Errorf("cannot start workspaces without a workspace image (required by registry-facade resolver)")
}

ideImage := options.IdeImage
if ideImage == "" {
cfg, err := GetServerIDEConfig(api.namespace, api.client)
if err != nil {
return nil, xerrors.Errorf("cannot find server IDE config: %q", err)
return nil, nil, xerrors.Errorf("cannot find server IDE config: %q", err)
}
ideImage = cfg.IDEOptions.Options.Code.Image
if ideImage == "" {
return nil, xerrors.Errorf("cannot start workspaces without an IDE image (required by registry-facade resolver)")
return nil, nil, xerrors.Errorf("cannot start workspaces without an IDE image (required by registry-facade resolver)")
}
}

Expand Down Expand Up @@ -173,7 +174,7 @@ func LaunchWorkspaceDirectly(ctx context.Context, api *ComponentAPI, opts ...Lau
for _, m := range options.Mods {
err := m(req)
if err != nil {
return nil, err
return nil, nil, err
}
}

Expand All @@ -182,26 +183,56 @@ func LaunchWorkspaceDirectly(ctx context.Context, api *ComponentAPI, opts ...Lau

wsm, err := api.WorkspaceManager()
if err != nil {
return nil, xerrors.Errorf("cannot start workspace manager: %q", err)
return nil, nil, xerrors.Errorf("cannot start workspace manager: %q", err)
}

sresp, err := wsm.StartWorkspace(sctx, req)
if err != nil {
return nil, xerrors.Errorf("cannot start workspace: %q", err)
return nil, nil, xerrors.Errorf("cannot start workspace: %q", err)
}

stopWs := func(waitForStop bool) error {
tctx, tcancel := context.WithTimeout(context.Background(), perCallTimeout)
defer tcancel()

for {
err = DeleteWorkspace(tctx, api, req.Id)
if st, ok := status.FromError(err); ok && st.Code() == codes.Unavailable {
time.Sleep(5 * time.Second)
continue
} else if err != nil {
return err
}
break
}
for {
_, err = WaitForWorkspaceStop(tctx, api, req.Id)
if st, ok := status.FromError(err); ok && st.Code() == codes.Unavailable {
time.Sleep(5 * time.Second)
continue
} else if err != nil {
return err
}
break
}
return err
}
defer func() {
if err != nil {
stopWs(false)
}
}()

lastStatus, err := WaitForWorkspaceStart(ctx, instanceID.String(), api, options.WaitForOpts...)
if err != nil {
return nil, xerrors.Errorf("cannot wait for workspace start: %q", err)
return nil, nil, xerrors.Errorf("cannot wait for workspace start: %q", err)
}

// it.t.Logf("workspace is running: instanceID=%s", instanceID.String())

return &LaunchWorkspaceDirectlyResult{
Req: req,
IdeURL: sresp.Url,
LastStatus: lastStatus,
}, nil
}, stopWs, nil
}

// LaunchWorkspaceFromContextURL force-creates a new workspace using the Gitpod server API,
Expand Down Expand Up @@ -246,9 +277,6 @@ func LaunchWorkspaceFromContextURL(ctx context.Context, contextURL string, usern
sctx, scancel := context.WithTimeout(ctx, perCallTimeout)
_ = server.StopWorkspace(sctx, resp.CreatedWorkspaceID)
scancel()
//if err != nil {
//it.t.Errorf("cannot stop workspace: %q", err)
//}

if waitForStop {
_, _ = WaitForWorkspaceStop(ctx, api, nfo.LatestInstance.ID)
Expand Down Expand Up @@ -413,7 +441,6 @@ func WaitForWorkspaceStop(ctx context.Context, api *ComponentAPI, instanceID str
_ = sub.CloseSend()
}()

var workspaceID string
done := make(chan struct{})
errCh := make(chan error)
go func() {
Expand All @@ -432,7 +459,6 @@ func WaitForWorkspaceStop(ctx context.Context, api *ComponentAPI, instanceID str
continue
}

workspaceID = status.Metadata.MetaId
if status.Conditions.Failed != "" {
errCh <- xerrors.Errorf("workspace instance %s failed: %s", instanceID, status.Conditions.Failed)
return
Expand All @@ -441,6 +467,8 @@ func WaitForWorkspaceStop(ctx context.Context, api *ComponentAPI, instanceID str
lastStatus = status
return
}

time.Sleep(10 * time.Second)
}
}()

Expand All @@ -462,44 +490,6 @@ func WaitForWorkspaceStop(ctx context.Context, api *ComponentAPI, instanceID str
case <-done:
}

// wait for the Theia service to be properly deleted
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
var (
start = time.Now()
serviceGone bool
)

// NOTE: this needs to be kept in sync with components/ws-manager/pkg/manager/manager.go:getTheiaServiceName()
// TODO(rl) expose it?
theiaName := fmt.Sprintf("ws-%s-theia", strings.TrimSpace(strings.ToLower(workspaceID)))
for time.Since(start) < 1*time.Minute {
var svc corev1.Service
err := api.client.Resources().Get(ctx, fmt.Sprintf("ws-%s-theia", workspaceID), api.namespace, &svc)
if errors.IsNotFound(err) {
serviceGone = true
break
}
time.Sleep(200 * time.Millisecond)
}
if !serviceGone {
return nil, xerrors.Errorf("workspace service did not disappear in time (theia)")
}
// Wait for the theia endpoints to be properly deleted (i.e. syncing)
var endpointGone bool
for time.Since(start) < 1*time.Minute {
var svc corev1.Endpoints
err := api.client.Resources().Get(ctx, theiaName, api.namespace, &svc)
if errors.IsNotFound(err) {
endpointGone = true
break
}
time.Sleep(200 * time.Millisecond)
}
if !endpointGone {
return nil, xerrors.Errorf("Theia endpoint:%s did not disappear in time", theiaName)
}

return
}

Expand Down
Loading