From 5854dbee73e7daccb21ce7d3449ea5ee5c40c162 Mon Sep 17 00:00:00 2001 From: Jagpreet Singh Tamber Date: Wed, 3 Apr 2024 13:56:55 -0400 Subject: [PATCH] Sanitize URLs for bucket fetch error messages. Co-authored-by: Jagpreet Singh Tamber Signed-off-by: Stefan Prodan --- internal/controller/bucket_controller.go | 4 +- internal/error/sanitized.go | 76 ++++++++++++ internal/error/sanitized_test.go | 141 +++++++++++++++++++++++ 3 files changed, 219 insertions(+), 2 deletions(-) create mode 100644 internal/error/sanitized.go create mode 100644 internal/error/sanitized_test.go diff --git a/internal/controller/bucket_controller.go b/internal/controller/bucket_controller.go index c5c3267d2..f12319e62 100644 --- a/internal/controller/bucket_controller.go +++ b/internal/controller/bucket_controller.go @@ -728,7 +728,7 @@ func fetchEtagIndex(ctx context.Context, provider BucketProvider, obj *bucketv1. path := filepath.Join(tempDir, sourceignore.IgnoreFile) if _, err := provider.FGetObject(ctxTimeout, obj.Spec.BucketName, sourceignore.IgnoreFile, path); err != nil { if !provider.ObjectIsNotFound(err) { - return err + return fmt.Errorf("failed to get Etag for '%s' object: %w", sourceignore.IgnoreFile, serror.SanitizeError(err)) } } ps, err := sourceignore.ReadIgnoreFile(path, nil) @@ -792,7 +792,7 @@ func fetchIndexFiles(ctx context.Context, provider BucketProvider, obj *bucketv1 index.Delete(k) return nil } - return fmt.Errorf("failed to get '%s' object: %w", k, err) + return fmt.Errorf("failed to get '%s' object: %w", k, serror.SanitizeError(err)) } if t != etag { index.Add(k, etag) diff --git a/internal/error/sanitized.go b/internal/error/sanitized.go new file mode 100644 index 000000000..04f6ccf92 --- /dev/null +++ b/internal/error/sanitized.go @@ -0,0 +1,76 @@ +/* +Copyright 2024 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package error + +import ( + "fmt" + "net/url" + "regexp" +) + +type SanitizedError struct { + err string +} + +func (e SanitizedError) Error() string { + return e.err +} + +// SanitizeError extracts all URLs from the error message +// and replaces them with the URL without the query string. +func SanitizeError(err error) SanitizedError { + errorMessage := err.Error() + for _, u := range extractURLs(errorMessage) { + urlWithoutQueryString, err := removeQueryString(u) + if err == nil { + re, err := regexp.Compile(fmt.Sprintf("%s*", regexp.QuoteMeta(u))) + if err == nil { + errorMessage = re.ReplaceAllString(errorMessage, urlWithoutQueryString) + } + } + } + + return SanitizedError{errorMessage} +} + +// removeQueryString takes a URL string as input and returns the URL without the query string. +func removeQueryString(urlStr string) (string, error) { + // Parse the URL. + u, err := url.Parse(urlStr) + if err != nil { + return "", err + } + + // Rebuild the URL without the query string. + u.RawQuery = "" + return u.String(), nil +} + +// extractURLs takes a log message as input and returns the URLs found. +func extractURLs(logMessage string) []string { + // Define a regular expression to match a URL. + // This is a simple pattern and might need to be adjusted depending on the log message format. + urlRegex := regexp.MustCompile(`https?://[^\s]+`) + + // Find the first match in the log message. + matches := urlRegex.FindAllString(logMessage, -1) + if len(matches) == 0 { + return []string{} + } + + return matches +} diff --git a/internal/error/sanitized_test.go b/internal/error/sanitized_test.go new file mode 100644 index 000000000..e9c6a858b --- /dev/null +++ b/internal/error/sanitized_test.go @@ -0,0 +1,141 @@ +/* +Copyright 2024 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package error + +import ( + "errors" + "testing" + + . "github.com/onsi/gomega" +) + +func Test_extractURLs(t *testing.T) { + + tests := []struct { + name string + logMessage string + wantUrls []string + }{ + { + name: "Log Contains single URL", + logMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\": dial tcp 20.60.53.129:443: connect: connection refused", + wantUrls: []string{"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\":"}, + }, + { + name: "Log Contains multiple URL", + logMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no : dial tcp 20.60.53.129:443: connect: connection refused", + wantUrls: []string{ + "https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es", + "https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no", + }, + }, + { + name: "Log Contains No URL", + logMessage: "Log message without URL", + wantUrls: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + urls := extractURLs(tt.logMessage) + + g.Expect(len(urls)).To(Equal(len(tt.wantUrls))) + for i := range tt.wantUrls { + g.Expect(urls[i]).To(Equal(tt.wantUrls[i])) + } + }) + } +} + +func Test_removeQueryString(t *testing.T) { + + tests := []struct { + name string + urlStr string + wantUrl string + }{ + { + name: "URL with query string", + urlStr: "https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02", + wantUrl: "https://blobstorage.blob.core.windows.net/container/index.yaml", + }, + { + name: "URL without query string", + urlStr: "https://blobstorage.blob.core.windows.net/container/index.yaml", + wantUrl: "https://blobstorage.blob.core.windows.net/container/index.yaml", + }, + { + name: "URL with query string and port", + urlStr: "https://blobstorage.blob.core.windows.net:443/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02", + wantUrl: "https://blobstorage.blob.core.windows.net:443/container/index.yaml", + }, + { + name: "Invalid URL", + urlStr: "NoUrl", + wantUrl: "NoUrl", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + urlWithoutQueryString, err := removeQueryString(tt.urlStr) + + g.Expect(err).To(BeNil()) + g.Expect(urlWithoutQueryString).To(Equal(tt.wantUrl)) + }) + } +} + +func Test_SanitizeError(t *testing.T) { + + tests := []struct { + name string + errMessage string + wantErrMessage string + }{ + { + name: "Log message with URL with query string", + errMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\": dial tcp 20.60.53.129:443: connect: connection refused", + wantErrMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml dial tcp 20.60.53.129:443: connect: connection refused", + }, + { + name: "Log message without URL", + errMessage: "Log message contains no URL", + wantErrMessage: "Log message contains no URL", + }, + + { + name: "Log message with multiple Urls", + errMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no dial tcp 20.60.53.129:443: connect: connection refused", + wantErrMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml https://blobstorage1.blob.core.windows.net/container/index.yaml dial tcp 20.60.53.129:443: connect: connection refused", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + err := SanitizeError(errors.New(tt.errMessage)) + g.Expect(err.Error()).To(Equal(tt.wantErrMessage)) + }) + } +}