From b727eadb9fdbe1691f1f0b5d8c2dd8335c9550d3 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Fri, 24 Mar 2023 14:30:45 +0100 Subject: [PATCH] pkg/debuginfod: Fix caching and follow redirects of debuginfod servers --- pkg/debuginfo/debuginfod.go | 103 +++++++++++++------------ pkg/debuginfo/debuginfod_test.go | 125 +++++++++++++++++++++++++++---- pkg/debuginfo/store.go | 6 +- pkg/debuginfo/store_test.go | 37 +++++---- 4 files changed, 188 insertions(+), 83 deletions(-) diff --git a/pkg/debuginfo/debuginfod.go b/pkg/debuginfo/debuginfod.go index 5574ef6aa71..a952b5f6842 100644 --- a/pkg/debuginfo/debuginfod.go +++ b/pkg/debuginfo/debuginfod.go @@ -15,6 +15,7 @@ package debuginfo import ( "bytes" + "errors" "fmt" "io" "net/http" @@ -74,6 +75,7 @@ func NewHTTPDebuginfodClient(logger log.Logger, serverURLs []string, timeoutDura parsedURLs = append(parsedURLs, u) } + return &HTTPDebuginfodClient{ logger: logger, upstreamServers: parsedURLs, @@ -90,44 +92,37 @@ func NewDebuginfodClientWithObjectStorageCache(logger log.Logger, bucket objstor }, nil } -type closer func() error +// Get returns debuginfo for given buildid while caching it in object storage. +func (c *DebuginfodClientObjectStorageCache) Get(ctx context.Context, buildID string) (io.ReadCloser, error) { + rc, err := c.bucket.Get(ctx, objectPath(buildID)) + if err != nil { + if c.bucket.IsObjNotFoundErr(err) { + return c.getAndCache(ctx, buildID) + } -func (f closer) Close() error { return f() } + return nil, err + } -type readCloser struct { - io.Reader - closer + return rc, nil } -// Get returns debuginfo for given buildid while caching it in object storage. -func (c *DebuginfodClientObjectStorageCache) Get(ctx context.Context, buildID string) (io.ReadCloser, error) { - debuginfo, err := c.client.Get(ctx, buildID) +func (c *DebuginfodClientObjectStorageCache) getAndCache(ctx context.Context, buildID string) (io.ReadCloser, error) { + r, err := c.client.Get(ctx, buildID) if err != nil { return nil, err } + defer r.Close() - r, w := io.Pipe() - go func() { - defer w.Close() - defer debuginfo.Close() - - // TODO(kakkoyun): Use store.upload() to upload the debuginfo to object storage. - if err := c.bucket.Upload(ctx, objectPath(buildID), r); err != nil { - level.Error(c.logger).Log("msg", "failed to upload downloaded debuginfod file", "err", err, "build_id", buildID) - } - }() + if err := c.bucket.Upload(ctx, objectPath(buildID), r); err != nil { + level.Error(c.logger).Log("msg", "failed to upload downloaded debuginfod file", "err", err, "build_id", buildID) + } - return readCloser{ - Reader: io.TeeReader(debuginfo, w), - closer: closer(func() error { - defer debuginfo.Close() + r, err = c.bucket.Get(ctx, objectPath(buildID)) + if err != nil { + return nil, err + } - if err := w.Close(); err != nil { - return err - } - return nil - }), - }, nil + return r, nil } // Exists returns true if debuginfo for given buildid exists. @@ -157,14 +152,7 @@ func (c *HTTPDebuginfodClient) Get(ctx context.Context, buildID string) (io.Read // "https://debuginfod.archlinux.org/" // "https://debuginfod.centos.org/" for _, u := range c.upstreamServers { - serverURL := *u - rc, err := func(serverURL url.URL) (io.ReadCloser, error) { - rc, err := c.request(ctx, serverURL, buildID) - if err != nil { - return nil, err - } - return rc, nil - }(serverURL) + rc, err := c.request(ctx, *u, buildID) if err != nil { continue } @@ -204,17 +192,38 @@ func (c *HTTPDebuginfodClient) request(ctx context.Context, u url.URL, buildID s return nil, fmt.Errorf("request failed: %w", err) } - switch resp.StatusCode / 100 { - case 2: - return resp.Body, nil - case 4: - if resp.StatusCode == http.StatusNotFound { - return nil, ErrDebuginfoNotFound + return c.handleResponse(ctx, resp) +} + +func (c *HTTPDebuginfodClient) handleResponse(ctx context.Context, resp *http.Response) (io.ReadCloser, error) { + // Follow at most 2 redirects. + for i := 0; i < 2; i++ { + switch resp.StatusCode / 100 { + case 2: + return resp.Body, nil + case 3: + req, err := http.NewRequestWithContext(ctx, "GET", resp.Header.Get("Location"), nil) + if err != nil { + return nil, fmt.Errorf("create request: %w", err) + } + + resp, err = c.client.Do(req) + if err != nil { + return nil, fmt.Errorf("request failed: %w", err) + } + + continue + case 4: + if resp.StatusCode == http.StatusNotFound { + return nil, ErrDebuginfoNotFound + } + return nil, fmt.Errorf("client error: %s", resp.Status) + case 5: + return nil, fmt.Errorf("server error: %s", resp.Status) + default: + return nil, fmt.Errorf("unexpected status code: %s", resp.Status) } - return nil, fmt.Errorf("client error: %s", resp.Status) - case 5: - return nil, fmt.Errorf("server error: %s", resp.Status) - default: - return nil, fmt.Errorf("unexpected status code: %s", resp.Status) } + + return nil, errors.New("too many redirects") } diff --git a/pkg/debuginfo/debuginfod_test.go b/pkg/debuginfo/debuginfod_test.go index 3f67d090105..45fec702f7c 100644 --- a/pkg/debuginfo/debuginfod_test.go +++ b/pkg/debuginfo/debuginfod_test.go @@ -15,8 +15,12 @@ package debuginfo import ( + "bytes" + "errors" + "fmt" "io" "net/http" + "net/http/httptest" "net/url" "os" "os/exec" @@ -26,6 +30,7 @@ import ( "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" "golang.org/x/net/context" "gopkg.in/dnaeon/go-vcr.v3/recorder" ) @@ -100,25 +105,115 @@ func TestHTTPDebugInfodClient_request(t *testing.T) { os.Remove(tmpfile.Name()) }) - _, err = io.Copy(tmpfile, r) - require.NoError(t, err) + downloadAndCompare(t, r, tt.want) + }) + } +} - require.NoError(t, tmpfile.Close()) +func downloadAndCompare(t *testing.T, r io.ReadCloser, want string) { + t.Helper() - cmd := exec.Command("file", tmpfile.Name()) + tmpfile, err := os.CreateTemp("", "debuginfod-download-*") + require.NoError(t, err) - stdout, err := cmd.Output() - require.NoError(t, err) + t.Cleanup(func() { + os.Remove(tmpfile.Name()) + }) - got := strings.TrimSpace(strings.Split(string(stdout), ":")[1]) + _, err = io.Copy(tmpfile, r) + require.NoError(t, err) - // For some reason the output of the `file` command is not always - // consistent across architectures, and in the amd64 case even - // inserts an escaped tab causing the string to contain `\011`. So - // we remove the inconsistencies and ten compare output strings. - got = strings.ReplaceAll(got, "\t", "") - got = strings.ReplaceAll(got, "\\011", "") - require.Equal(t, tt.want, got) - }) + require.NoError(t, tmpfile.Close()) + + cmd := exec.Command("file", tmpfile.Name()) + + stdout, err := cmd.Output() + require.NoError(t, err) + + got := strings.TrimSpace(strings.Split(string(stdout), ":")[1]) + + // For some reason the output of the `file` command is not always + // consistent across architectures, and in the amd64 case even + // inserts an escaped tab causing the string to contain `\011`. So + // we remove the inconsistencies and ten compare output strings. + got = strings.ReplaceAll(got, "\t", "") + got = strings.ReplaceAll(got, "\\011", "") + require.Equal(t, want, got) +} + +func TestHTTPDebugInfodClientRedirect(t *testing.T) { + ds := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, "test") + })) + defer ds.Close() + + rs := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Redirect(w, r, ds.URL+r.URL.Path, http.StatusFound) + })) + defer rs.Close() + + c, err := NewHTTPDebuginfodClient(log.NewNopLogger(), []string{rs.URL}, 30*time.Second) + require.NoError(t, err) + + ctx := context.Background() + r, err := c.Get(ctx, "d278249792061c6b74d1693ca59513be1def13f2") + require.NoError(t, err) + require.NotNil(t, r) + + content, err := io.ReadAll(r) + require.NoError(t, err) + + require.Equal(t, "test", string(content)) +} + +type fakeDebuginfodClient struct { + get func(ctx context.Context, buildID string) (io.ReadCloser, error) + exists func(ctx context.Context, buildID string) (bool, error) +} + +func (f *fakeDebuginfodClient) Get(ctx context.Context, buildID string) (io.ReadCloser, error) { + return f.get(ctx, buildID) +} + +func (f *fakeDebuginfodClient) Exists(ctx context.Context, buildID string) (bool, error) { + return f.exists(ctx, buildID) +} + +type nopCloser struct { + io.Reader +} + +func (nopCloser) Close() error { return nil } + +func TestHTTPDebugInfodCache(t *testing.T) { + c := &fakeDebuginfodClient{ + get: func(ctx context.Context, buildID string) (io.ReadCloser, error) { + return nopCloser{bytes.NewBuffer([]byte("test"))}, nil + }, } + + cache, err := NewDebuginfodClientWithObjectStorageCache( + log.NewNopLogger(), + objstore.NewInMemBucket(), + c, + ) + require.NoError(t, err) + + ctx := context.Background() + r, err := cache.Get(ctx, "test") + require.NoError(t, err) + content, err := io.ReadAll(r) + require.NoError(t, err) + require.Equal(t, "test", string(content)) + + // Test that the cache works. + c.get = func(ctx context.Context, buildID string) (io.ReadCloser, error) { + return nil, errors.New("should not be called") + } + + r, err = cache.Get(ctx, "test") + require.NoError(t, err) + content, err = io.ReadAll(r) + require.NoError(t, err) + require.Equal(t, "test", string(content)) } diff --git a/pkg/debuginfo/store.go b/pkg/debuginfo/store.go index e39da583915..dc8a7cdf2c3 100644 --- a/pkg/debuginfo/store.go +++ b/pkg/debuginfo/store.go @@ -150,10 +150,14 @@ func (s *Store) ShouldInitiateUpload(ctx context.Context, req *debuginfopb.Shoul } if existsInDebuginfod { + if err := s.metadata.MarkAsDebuginfodSource(ctx, buildID); err != nil { + return nil, status.Error(codes.Internal, fmt.Errorf("mark Build ID to be available from debuginfod: %w", err).Error()) + } + return &debuginfopb.ShouldInitiateUploadResponse{ ShouldInitiateUpload: false, Reason: ReasonDebuginfoInDebuginfod, - }, s.metadata.MarkAsDebuginfodSource(ctx, buildID) + }, nil } return &debuginfopb.ShouldInitiateUploadResponse{ diff --git a/pkg/debuginfo/store_test.go b/pkg/debuginfo/store_test.go index a6065575445..b324cd368b0 100644 --- a/pkg/debuginfo/store_test.go +++ b/pkg/debuginfo/store_test.go @@ -32,22 +32,21 @@ import ( debuginfopb "github.com/parca-dev/parca/gen/proto/go/parca/debuginfo/v1alpha1" ) -type fakeDebuginfodClient struct { - items map[string]io.ReadCloser -} - -func (c *fakeDebuginfodClient) Get(ctx context.Context, buildid string) (io.ReadCloser, error) { - item, ok := c.items[buildid] - if !ok { - return nil, ErrDebuginfoNotFound +func newFakeDebuginfodClientWithItems(items map[string]io.ReadCloser) *fakeDebuginfodClient { + return &fakeDebuginfodClient{ + get: func(ctx context.Context, buildid string) (io.ReadCloser, error) { + item, ok := items[buildid] + if !ok { + return nil, ErrDebuginfoNotFound + } + + return item, nil + }, + exists: func(ctx context.Context, buildid string) (bool, error) { + _, ok := items[buildid] + return ok, nil + }, } - - return item, nil -} - -func (c *fakeDebuginfodClient) Exists(ctx context.Context, buildid string) (bool, error) { - _, ok := c.items[buildid] - return ok, nil } func TestStore(t *testing.T) { @@ -63,11 +62,9 @@ func TestStore(t *testing.T) { logger, metadata, bucket, - &fakeDebuginfodClient{ - items: map[string]io.ReadCloser{ - "deadbeef": io.NopCloser(bytes.NewBufferString("debuginfo1")), - }, - }, + newFakeDebuginfodClientWithItems(map[string]io.ReadCloser{ + "deadbeef": io.NopCloser(bytes.NewBufferString("debuginfo1")), + }), SignedUpload{ Enabled: false, },