Skip to content

Commit

Permalink
[initializer] Add digest support and unit tests
Browse files Browse the repository at this point in the history
to the new FileDownloadInitializer
  • Loading branch information
csweichel committed Jun 15, 2021
1 parent c10fb46 commit 5436968
Show file tree
Hide file tree
Showing 7 changed files with 231 additions and 52 deletions.
1 change: 1 addition & 0 deletions components/content-service/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ require (
github.com/prometheus/client_golang v1.9.0
github.com/spf13/cobra v1.1.1
golang.org/x/oauth2 v0.0.0-20210427180440-81ed05c6b58c
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1
google.golang.org/api v0.46.0
google.golang.org/grpc v1.37.0
Expand Down
1 change: 1 addition & 0 deletions components/content-service/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,7 @@ golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ=
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
Expand Down
103 changes: 65 additions & 38 deletions components/content-service/pkg/initializer/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package initializer
import (
"context"
"io"
"io/ioutil"
"net/http"
"os"
"path/filepath"
Expand All @@ -17,83 +16,111 @@ import (
"github.com/gitpod-io/gitpod/common-go/tracing"
csapi "github.com/gitpod-io/gitpod/content-service/api"
"github.com/gitpod-io/gitpod/content-service/pkg/archive"
"github.com/opencontainers/go-digest"
"github.com/opentracing/opentracing-go"
"golang.org/x/sync/errgroup"
"golang.org/x/xerrors"
)

type FileInfo struct {
url string
// filePath is relative to the FileDownloadInitializer's TargetLocation, e.g. if TargetLocation is in `/workspace/myrepo`
// a filePath of `foobar/file` would produce a file in `/workspace/myrepo/foobar/file`.
// filePath must include the filename. The FileDownloadInitializer will create any parent directories
type fileInfo struct {
URL string

// Path is relative to the FileDownloadInitializer's TargetLocation, e.g. if TargetLocation is in `/workspace/myrepo`
// a Path of `foobar/file` would produce a file in `/workspace/myrepo/foobar/file`.
// Path must include the filename. The FileDownloadInitializer will create any parent directories
// necessary to place the file.
filePath string
// digest is a hash of the file content in the OCI digest format (see https://github.com/opencontainers/image-spec/blob/master/descriptor.md#digests).
Path string

// Digest is a hash of the file content in the OCI Digest format (see https://github.com/opencontainers/image-spec/blob/master/descriptor.md#digests).
// This information is used to compute subsequent
// content versions, and to validate the file content was downloaded correctly.
digest string
Digest digest.Digest
}

type FileDownloadInitializer struct {
FilesInfos []FileInfo
type fileDownloadInitializer struct {
FilesInfos []fileInfo
TargetLocation string
HTTPClient *http.Client
RetryTimeout time.Duration
}

// Run initializes the workspace
func (ws *FileDownloadInitializer) Run(ctx context.Context, mappings []archive.IDMapping) (src csapi.WorkspaceInitSource, err error) {
func (ws *fileDownloadInitializer) Run(ctx context.Context, mappings []archive.IDMapping) (src csapi.WorkspaceInitSource, err error) {
span, ctx := opentracing.StartSpanFromContext(ctx, "FileDownloadInitializer.Run")
defer tracing.FinishSpan(span, &err)

for _, info := range ws.FilesInfos {
contents, err := downloadFile(ctx, info.url)
if err != nil {
tracing.LogError(span, xerrors.Errorf("cannot download file '%s' from '%s': %w", info.filePath, info.url, err))
}

fullPath := filepath.Join(ws.TargetLocation, info.filePath)
err = os.MkdirAll(filepath.Dir(fullPath), 0755)
if err != nil {
tracing.LogError(span, xerrors.Errorf("cannot mkdir %s: %w", filepath.Dir(fullPath), err))
}
err = ioutil.WriteFile(fullPath, contents, 0755)
err := ws.downloadFile(ctx, info)
if err != nil {
tracing.LogError(span, xerrors.Errorf("cannot write %s: %w", fullPath, err))
tracing.LogError(span, xerrors.Errorf("cannot download file '%s' from '%s': %w", info.Path, info.URL, err))
return src, err
}
}
return src, nil
return csapi.WorkspaceInitFromOther, nil
}

func downloadFile(ctx context.Context, url string) (content []byte, err error) {
func (ws *fileDownloadInitializer) downloadFile(ctx context.Context, info fileInfo) (err error) {
//nolint:ineffassign
span, ctx := opentracing.StartSpanFromContext(ctx, "downloadFile")
defer tracing.FinishSpan(span, &err)
span.LogKV("url", url)
span.LogKV("url", info.URL)

dl := func() (content []byte, err error) {
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
fn := filepath.Join(ws.TargetLocation, info.Path)
err = os.MkdirAll(filepath.Dir(fn), 0755)
if err != nil {
tracing.LogError(span, xerrors.Errorf("cannot mkdir %s: %w", filepath.Dir(fn), err))
}

fd, err := os.OpenFile(fn, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
return err
}

dl := func() (err error) {
req, err := http.NewRequestWithContext(ctx, "GET", info.URL, nil)
if err != nil {
return nil, err
return err
}
_ = opentracing.GlobalTracer().Inject(span.Context(), opentracing.HTTPHeaders, opentracing.HTTPHeadersCarrier(req.Header))

resp, err := http.DefaultClient.Do(req)
resp, err := ws.HTTPClient.Do(req)
if err != nil {
return nil, err
return err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, xerrors.Errorf("non-OK OTS response: %s", resp.Status)
return xerrors.Errorf("non-OK download response: %s", resp.Status)
}

return io.ReadAll(resp.Body)
pr, pw := io.Pipe()
body := io.TeeReader(resp.Body, pw)

eg, _ := errgroup.WithContext(ctx)
eg.Go(func() error {
_, err = io.Copy(fd, body)
pw.Close()
return err
})
eg.Go(func() error {
dgst, err := digest.FromReader(pr)
if err != nil {
return err
}
if dgst != info.Digest {
return xerrors.Errorf("digest mismatch")
}
return nil
})

return eg.Wait()
}
for i := 0; i < otsDownloadAttempts; i++ {
span.LogKV("attempt", i)
if i > 0 {
time.Sleep(time.Second)
time.Sleep(ws.RetryTimeout)
}

content, err = dl()
err = dl()
if err == context.Canceled || err == context.DeadlineExceeded {
return
}
Expand All @@ -103,8 +130,8 @@ func downloadFile(ctx context.Context, url string) (content []byte, err error) {
log.WithError(err).WithField("attempt", i).Warn("cannot download additional content files")
}
if err != nil {
return nil, err
return err
}

return content, nil
return nil
}
147 changes: 147 additions & 0 deletions components/content-service/pkg/initializer/download_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
// Copyright (c) 2021 Gitpod GmbH. All rights reserved.
// Licensed under the GNU Affero General Public License (AGPL).
// See License-AGPL.txt in the project root for license information.

package initializer

import (
"bytes"
"context"
"io"
"net/http"
"os"
"testing"

"github.com/gitpod-io/gitpod/content-service/api"
"github.com/opencontainers/go-digest"
)

// RoundTripFunc .
type RoundTripFunc func(req *http.Request) *http.Response

// RoundTrip .
func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return f(req), nil
}

func TestFileDownloadInitializer(t *testing.T) {
const defaultContent = "hello world"

type serverSideFile struct {
Path string
Content string
}
tests := []struct {
Name string
Files []fileInfo
ServerSide []serverSideFile
ExpectedError string
}{
{
Name: "happy path",
Files: []fileInfo{
{
URL: "/file1",
Path: "/level/file1",
Digest: digest.FromString(defaultContent),
},
// duplication is intentional
{
URL: "/file1",
Path: "/level/file1",
Digest: digest.FromString(defaultContent),
},
{
URL: "/file2",
Path: "/level/file2",
Digest: digest.FromString(defaultContent),
},
},
ServerSide: []serverSideFile{
{Path: "/file1", Content: defaultContent},
{Path: "/file2", Content: defaultContent},
},
},
{
Name: "digest mismatch",
Files: []fileInfo{
{
URL: "/file1",
Path: "/level/file1",
Digest: digest.FromString(defaultContent + "foobar"),
},
},
ServerSide: []serverSideFile{
{Path: "/file1", Content: defaultContent},
},
ExpectedError: "digest mismatch",
},
{
Name: "file not found",
Files: []fileInfo{
{
URL: "/file1",
Path: "/level/file1",
Digest: digest.FromString(defaultContent + "foobar"),
},
},
ExpectedError: "non-OK download response: Not Found",
},
}

for _, test := range tests {
t.Run(test.Name, func(t *testing.T) {
tmpdir, err := os.MkdirTemp("", "TestFileDownloadInitializer*")
if err != nil {
t.Fatal("cannot create tempdir", err)
}
defer os.RemoveAll(tmpdir)

client := &http.Client{
Transport: RoundTripFunc(func(req *http.Request) *http.Response {
for _, f := range test.ServerSide {
if f.Path != req.URL.Path {
continue
}

return &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewReader([]byte(f.Content))),
Header: make(http.Header),
}
}

return &http.Response{
Status: http.StatusText(http.StatusNotFound),
StatusCode: http.StatusNotFound,
Header: make(http.Header),
}
}),
}

req := &api.FileDownloadInitializer{}
for _, f := range test.Files {
req.Files = append(req.Files, &api.FileDownloadInitializer_FileInfo{
Url: "http://foobar" + f.URL,
FilePath: f.Path,
Digest: string(f.Digest),
})
}

initializer, err := newFileDownloadInitializer(tmpdir, req)
if err != nil {
t.Fatal(err)
}
initializer.HTTPClient = client
initializer.RetryTimeout = 0

src, err := initializer.Run(context.Background(), nil)
if err == nil && src != api.WorkspaceInitFromOther {
t.Error("initializer returned wrong content init source")
}
if err != nil && err.Error() != test.ExpectedError {
t.Fatalf("unexpected error: %v", err)
}
})
}
}
21 changes: 14 additions & 7 deletions components/content-service/pkg/initializer/initializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"strings"
"time"

"github.com/opencontainers/go-digest"
"github.com/opentracing/opentracing-go"
"golang.org/x/xerrors"
"google.golang.org/grpc/codes"
Expand Down Expand Up @@ -145,18 +146,24 @@ func NewFromRequest(ctx context.Context, loc string, rs storage.DirectDownloader
}

// newFileDownloadInitializer creates a download initializer for a request
func newFileDownloadInitializer(loc string, req *csapi.FileDownloadInitializer) (*FileDownloadInitializer, error) {
fileInfos := make([]FileInfo, len(req.Files))
func newFileDownloadInitializer(loc string, req *csapi.FileDownloadInitializer) (*fileDownloadInitializer, error) {
fileInfos := make([]fileInfo, len(req.Files))
for i, f := range req.Files {
fileInfos[i] = FileInfo{
url: f.Url,
filePath: f.FilePath,
digest: f.Digest,
dgst, err := digest.Parse(f.Digest)
if err != nil {
return nil, xerrors.Errorf("invalid digest %s: %w", f.Digest, err)
}
fileInfos[i] = fileInfo{
URL: f.Url,
Path: f.FilePath,
Digest: dgst,
}
}
initializer := &FileDownloadInitializer{
initializer := &fileDownloadInitializer{
FilesInfos: fileInfos,
TargetLocation: filepath.Join(loc, req.TargetLocation),
HTTPClient: http.DefaultClient,
RetryTimeout: 1 * time.Second,
}
return initializer, nil
}
Expand Down
6 changes: 2 additions & 4 deletions components/server/src/workspace/image-source-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { ImageBuilderClientProvider, ResolveBaseImageRequest, BuildRegistryAuthT
import { HostContextProvider } from "../auth/host-context-provider";
import { TraceContext } from "@gitpod/gitpod-protocol/lib/util/tracing";
import { CommitContext, WorkspaceImageSource, WorkspaceConfig, WorkspaceImageSourceReference, WorkspaceImageSourceDocker, ImageConfigFile, ExternalImageConfigFile, User, AdditionalContentContext } from "@gitpod/gitpod-protocol";
import { createHmac } from 'crypto';
import { createHash } from 'crypto';

@injectable()
export class ImageSourceProvider {
Expand Down Expand Up @@ -93,9 +93,7 @@ export class ImageSourceProvider {
}

protected getContentSHA(contents: string): string {
return createHmac('sha256', '')
.update(contents)
.digest('hex');
return createHash('sha256').update(contents).digest('hex');
}


Expand Down
Loading

0 comments on commit 5436968

Please sign in to comment.