From b59c2e8fc901b0ae5092cba47a093b38bebc06b3 Mon Sep 17 00:00:00 2001 From: Silvano Ravotto Date: Thu, 21 Aug 2025 17:22:25 -0400 Subject: [PATCH] store: add initial S3 store implementation Introduce `internal/store/s3.go` with an S3-backed Store implementation: - Constructed via `S3FromEnv` using AWS-style environment variables. - Supports alternates (path-style, skip checksum, skip TLS) with a safe baseline first. - Probes connectivity by writing, reading, and deleting a `_blobcheck` object. --- go.mod | 19 +++ go.sum | 38 +++++ internal/env/env.go | 28 +++ internal/store/s3.go | 320 +++++++++++++++++++++++++++++++++++ internal/store/s3_test.go | 284 +++++++++++++++++++++++++++++++ internal/store/store.go | 26 +++ internal/store/store_test.go | 67 ++++++++ 7 files changed, 782 insertions(+) create mode 100644 internal/env/env.go create mode 100644 internal/store/s3.go create mode 100644 internal/store/s3_test.go create mode 100644 internal/store/store_test.go diff --git a/go.mod b/go.mod index 6125971..db08cff 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,21 @@ require ( require ( github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.18.6 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.28.2 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.2 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.38.0 // indirect + github.com/aws/smithy-go v1.22.5 // indirect github.com/bmatcuk/doublestar/v4 v4.0.2 // indirect github.com/cockroachdb/gostdlib v1.19.0 // indirect github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect @@ -37,8 +52,12 @@ require ( ) require ( + github.com/aws/aws-sdk-go-v2 v1.38.1 + github.com/aws/aws-sdk-go-v2/config v1.31.2 + github.com/aws/aws-sdk-go-v2/service/s3 v1.87.1 github.com/cockroachdb/errors v1.12.0 github.com/cockroachdb/field-eng-powertools v0.1.3 + github.com/google/uuid v1.6.0 github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect github.com/jackc/puddle/v2 v2.2.2 // indirect diff --git a/go.sum b/go.sum index a045484..fee8d6c 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,41 @@ github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c h1:pxW6RcqyfI9/kWtOwnv/G+AzdKuy2ZrqINhenH4HyNs= github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= +github.com/aws/aws-sdk-go-v2 v1.38.1 h1:j7sc33amE74Rz0M/PoCpsZQ6OunLqys/m5antM0J+Z8= +github.com/aws/aws-sdk-go-v2 v1.38.1/go.mod h1:9Q0OoGQoboYIAJyslFyF1f5K1Ryddop8gqMhWx/n4Wg= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0 h1:6GMWV6CNpA/6fbFHnoAjrv4+LGfyTqZz2LtCHnspgDg= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.0/go.mod h1:/mXlTIVG9jbxkqDnr5UQNQxW1HRYxeGklkM9vAFeabg= +github.com/aws/aws-sdk-go-v2/config v1.31.2 h1:NOaSZpVGEH2Np/c1toSeW0jooNl+9ALmsUTZ8YvkJR0= +github.com/aws/aws-sdk-go-v2/config v1.31.2/go.mod h1:17ft42Yb2lF6OigqSYiDAiUcX4RIkEMY6XxEMJsrAes= +github.com/aws/aws-sdk-go-v2/credentials v1.18.6 h1:AmmvNEYrru7sYNJnp3pf57lGbiarX4T9qU/6AZ9SucU= +github.com/aws/aws-sdk-go-v2/credentials v1.18.6/go.mod h1:/jdQkh1iVPa01xndfECInp1v1Wnp70v3K4MvtlLGVEc= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.4 h1:lpdMwTzmuDLkgW7086jE94HweHCqG+uOJwHf3LZs7T0= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.4/go.mod h1:9xzb8/SV62W6gHQGC/8rrvgNXU6ZoYM3sAIJCIrXJxY= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.4 h1:IdCLsiiIj5YJ3AFevsewURCPV+YWUlOW8JiPhoAy8vg= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.4/go.mod h1:l4bdfCD7XyyZA9BolKBo1eLqgaJxl0/x91PL4Yqe0ao= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.4 h1:j7vjtr1YIssWQOMeOWRbh3z8g2oY/xPjnZH2gLY4sGw= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.4/go.mod h1:yDmJgqOiH4EA8Hndnv4KwAo8jCGTSnM5ASG1nBI+toA= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.4 h1:BE/MNQ86yzTINrfxPPFS86QCBNQeLKY2A0KhDh47+wI= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.4/go.mod h1:SPBBhkJxjcrzJBc+qY85e83MQ2q3qdra8fghhkkyrJg= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0 h1:6+lZi2JeGKtCraAj1rpoZfKqnQ9SptseRZioejfUOLM= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.0/go.mod h1:eb3gfbVIxIoGgJsi9pGne19dhCBpK6opTYpQqAmdy44= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.4 h1:Beh9oVgtQnBgR4sKKzkUBRQpf1GnL4wt0l4s8h2VCJ0= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.8.4/go.mod h1:b17At0o8inygF+c6FOD3rNyYZufPw62o9XJbSfQPgbo= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.4 h1:ueB2Te0NacDMnaC+68za9jLwkjzxGWm0KB5HTUHjLTI= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.4/go.mod h1:nLEfLnVMmLvyIG58/6gsSA03F1voKGaCfHV7+lR8S7s= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.4 h1:HVSeukL40rHclNcUqVcBwE1YoZhOkoLeBfhUqR3tjIU= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.4/go.mod h1:DnbBOv4FlIXHj2/xmrUQYtawRFC9L9ZmQPz+DBc6X5I= +github.com/aws/aws-sdk-go-v2/service/s3 v1.87.1 h1:2n6Pd67eJwAb/5KCX62/8RTU0aFAAW7V5XIGSghiHrw= +github.com/aws/aws-sdk-go-v2/service/s3 v1.87.1/go.mod h1:w5PC+6GHLkvMJKasYGVloB3TduOtROEMqm15HSuIbw4= +github.com/aws/aws-sdk-go-v2/service/sso v1.28.2 h1:ve9dYBB8CfJGTFqcQ3ZLAAb/KXWgYlgu/2R2TZL2Ko0= +github.com/aws/aws-sdk-go-v2/service/sso v1.28.2/go.mod h1:n9bTZFZcBa9hGGqVz3i/a6+NG0zmZgtkB9qVVFDqPA8= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.2 h1:pd9G9HQaM6UZAZh19pYOkpKSQkyQQ9ftnl/LttQOcGI= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.33.2/go.mod h1:eknndR9rU8UpE/OmFpqU78V1EcXPKFTTm5l/buZYgvM= +github.com/aws/aws-sdk-go-v2/service/sts v1.38.0 h1:iV1Ko4Em/lkJIsoKyGfc0nQySi+v0Udxr6Igq+y9JZc= +github.com/aws/aws-sdk-go-v2/service/sts v1.38.0/go.mod h1:bEPcjW7IbolPfK67G1nilqWyoxYMSPrDiIQ3RdIdKgo= +github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw= +github.com/aws/smithy-go v1.22.5/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/bmatcuk/doublestar/v4 v4.0.2 h1:X0krlUVAVmtr2cRoTqR8aDMrDqnB36ht8wpWTiQ3jsA= github.com/bmatcuk/doublestar/v4 v4.0.2/go.mod h1:xBQ8jztBU6kakFMg+8WGxn0c6z1fTSPVIjEY1Wr7jzc= github.com/cockroachdb/crlfmt v0.3.0 h1:IaPIlidTHn7s493ozBcpnkF/HNzCNe7aBJmUMqgIlOk= @@ -28,6 +64,8 @@ github.com/google/addlicense v1.2.0 h1:W+DP4A639JGkcwBGMDvjSurZHvaq2FN0pP7se9czs github.com/google/addlicense v1.2.0/go.mod h1:Sm/DHu7Jk+T5miFHHehdIjbi4M5+dJDRS3Cq0rncIxA= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= diff --git a/internal/env/env.go b/internal/env/env.go new file mode 100644 index 0000000..b5380cd --- /dev/null +++ b/internal/env/env.go @@ -0,0 +1,28 @@ +// Copyright 2025 Cockroach Labs, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package env + +// LookupEnv is a function that retrieves the value of an environment variable. +type LookupEnv func(key string) (string, bool) + +// Env holds the environment configuration. +type Env struct { + DatabaseURL string // the database connection URL + Endpoint string // the S3 endpoint + Path string // the S3 bucket path + LookupEnv LookupEnv // allows injection of environment variable lookup for testing + Testing bool // enables testing mode + Verbose bool // enables verbose logging +} diff --git a/internal/store/s3.go b/internal/store/s3.go new file mode 100644 index 0000000..c2d4113 --- /dev/null +++ b/internal/store/s3.go @@ -0,0 +1,320 @@ +// Copyright 2025 Cockroach Labs, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package store + +import ( + "context" + "crypto/tls" + "fmt" + "io" + "iter" + "log/slog" + "maps" + "net/http" + "net/url" + "path" + "slices" + "strings" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/google/uuid" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/field-eng-powertools/stopper" + "github.com/cockroachlabs-field/blobcheck/internal/env" +) + +const ( + // AccountParam is the AWS access key ID. + AccountParam = "AWS_ACCESS_KEY_ID" + // SecretParam is the AWS secret access key. + SecretParam = "AWS_SECRET_ACCESS_KEY" + // TokenParam is the AWS session token. + TokenParam = "AWS_SESSION_TOKEN" + // EndPointParam is the AWS endpoint. + EndPointParam = "AWS_ENDPOINT" + // RegionParam is the AWS region. + RegionParam = "AWS_REGION" + // UsePathStyleParam is the AWS use path style. + UsePathStyleParam = "AWS_USE_PATH_STYLE" + // SkipChecksum is the AWS skip checksum. + SkipChecksum = "AWS_SKIP_CHECKSUM" + // SkipTLSVerify is the AWS skip TLS verify. + SkipTLSVerify = "AWS_SKIP_TLS_VERIFY" + + // DefaultRegion is the default AWS region. + DefaultRegion = "aws-global" +) + +// ValidParams lists the valid parameters for the S3 store. +var ValidParams = []string{ + AccountParam, SecretParam, TokenParam, EndPointParam, + RegionParam, UsePathStyleParam, SkipChecksum, SkipTLSVerify, +} + +var ( + // ObfuscatedParams lists the parameters that should be obfuscated. + ObfuscatedParams = []string{SecretParam, TokenParam} + // Obfuscated is the value used to obfuscate sensitive parameters. + Obfuscated = "******" +) + +// ErrMissingParam is returned when required parameters are missing. +var ErrMissingParam = errors.New("AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY must be set") + +type s3Store struct { + params Params + dest string + testing bool + verbose bool +} + +// S3FromEnv creates a new S3 store from the environment. +// It will try to connect to the S3 service using the environment variables provided, +// and adding any parameters that are required. +func S3FromEnv(ctx *stopper.Context, env *env.Env) (Store, error) { + creds, ok := lookupEnv(env, []string{AccountParam, SecretParam}, []string{TokenParam, RegionParam}) + if !ok { + return nil, ErrMissingParam + } + if env.Endpoint != "" { + creds[EndPointParam] = env.Endpoint + } + if _, ok := creds[RegionParam]; !ok { + creds[RegionParam] = DefaultRegion + } + initial := &s3Store{ + dest: path.Join(env.Path, uuid.NewString()), + params: creds, + testing: env.Testing, + } + return initial.try(ctx, initial.BucketName()) +} + +// BucketName implements Store. +func (s *s3Store) BucketName() string { + cleanedPath := path.Clean(s.dest) + components := strings.Split(cleanedPath, "/") + if len(components) == 0 { + return "" + } + return components[0] +} + +// Params implements Store. +func (s *s3Store) Params() Params { + params := maps.Clone(s.params) + for param := range params { + if slices.Contains(ObfuscatedParams, param) { + params[param] = Obfuscated + } + } + return params +} + +// URL implements Store. +func (s *s3Store) URL() string { + res := s.escapeValues() + res = fmt.Sprintf("s3://%s?%s", s.dest, res) + return res +} + +// addParam adds a parameter to the S3 store. +func (s *s3Store) addParam(key string, value string) error { + if slices.Contains(ValidParams, key) { + s.params[key] = value + return nil + } + return errors.Newf("invalid param %q", key) +} + +// candidateConfigs provides a set of candidate configurations for the S3 store. +// TODO(silvano): consider making this public. +func (s *s3Store) candidateConfigs() iter.Seq[Store] { + return func(yield func(Store) bool) { + combos := [][]string{ + {}, // baseline first + {SkipChecksum}, + {SkipTLSVerify}, + {UsePathStyleParam}, + {UsePathStyleParam, SkipChecksum}, + {UsePathStyleParam, SkipTLSVerify}, + {UsePathStyleParam, SkipTLSVerify, SkipChecksum}, + } + for _, combo := range combos { + alt := &s3Store{ + dest: s.dest, + params: maps.Clone(s.params), + } + for _, option := range combo { + alt.addParam(option, "true") + } + if !yield(alt) { + return + } + } + } +} + +// escapeValues provides a URL-encoded query string representation of the S3 store parameters. +func (s *s3Store) escapeValues() string { + var sb strings.Builder + first := true + for key, value := range s.params.Iter() { + if first { + first = false + } else { + sb.WriteString("&") + } + sb.WriteString(fmt.Sprintf("%s=%s", url.QueryEscape(key), url.QueryEscape(value))) + } + return sb.String() +} + +// lookupEnv retrieves required and optional environment variables from the provided environment. +func lookupEnv(env *env.Env, required []string, optional []string) (map[string]string, bool) { + res := make(map[string]string) + for _, v := range required { + val, ok := env.LookupEnv(v) + if !ok { + return nil, false + } + res[v] = val + } + // Add optional environment variables. + for _, v := range optional { + val, ok := env.LookupEnv(v) + if ok { + res[v] = val + } + } + return res, true +} + +const ( + objectKey = "_blobcheck" + content = "dummy_data" +) + +// try attempts to connect to the S3 store using alternative configurations. +func (s *s3Store) try(ctx context.Context, bucketName string) (Store, error) { + var clientMode aws.ClientLogMode + if s.verbose { + clientMode |= aws.LogRetries | aws.LogRequestWithBody | aws.LogRequestEventMessage | aws.LogResponse | aws.LogResponseEventMessage | aws.LogSigning + } + for alt := range s.candidateConfigs() { + params := alt.Params() + var loadOptions []func(options *config.LoadOptions) error + addLoadOption := func(option config.LoadOptionsFunc) { + loadOptions = append(loadOptions, option) + } + client := &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: params[SkipTLSVerify] == "true"}, + }, + } + addLoadOption(config.WithHTTPClient(client)) + if params[SkipTLSVerify] == "true" { + slog.Warn("TLS verification is disabled; use only for testing") + } + retryMaxAttempts := 1 + addLoadOption(config.WithRetryMaxAttempts(retryMaxAttempts)) + addLoadOption(config.WithClientLogMode(clientMode)) + // TODO (silvano) - consider removing testing guard + // LoadDefaultConfig will always honor env based provided credentials if present. + if s.testing { + addLoadOption(config.WithCredentialsProvider(aws.CredentialsProviderFunc(func(ctx context.Context) (aws.Credentials, error) { + return aws.Credentials{ + AccessKeyID: s.params[AccountParam], + SecretAccessKey: s.params[SecretParam], + SessionToken: s.params[TokenParam], + }, nil + }))) + } + config, err := config.LoadDefaultConfig(ctx, loadOptions...) + if err != nil { + return nil, err + } + + usePathStyle := params[UsePathStyleParam] == "true" + skipChecksum := params[SkipChecksum] == "true" + if skipChecksum { + config.RequestChecksumCalculation = aws.RequestChecksumCalculationWhenSupported + config.ResponseChecksumValidation = aws.ResponseChecksumValidationWhenSupported + } + s3Client := s3.NewFromConfig(config, func(o *s3.Options) { + if ep := params[EndPointParam]; ep != "" { + o.BaseEndpoint = aws.String(ep) + } + o.Region = params[RegionParam] + o.UsePathStyle = usePathStyle + }) + + slog.Debug("Trying params", slog.Any("env", alt.Params())) + + if _, err := s3Client.ListObjectsV2(ctx, &s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + }); err != nil { + slog.Debug("Failed to list objects", slog.Any("error", err), slog.Any("env", alt.Params())) + continue + } + // Build a probe key that includes the dest prefix (if any) + prefix := strings.TrimPrefix(s.dest, s.BucketName()) + prefix = strings.TrimPrefix(prefix, "/") + probeKey := objectKey + if prefix != "" { + probeKey = path.Join(prefix, objectKey) + } + // Try to write the object + input := &s3.PutObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(probeKey), + Body: strings.NewReader(content), // Use a reader for the content + } + if _, err := s3Client.PutObject(ctx, input); err != nil { + slog.Error("Failed to put object", slog.Any("error", err), slog.Any("env", alt.Params())) + continue + } + result, err := s3Client.GetObject(ctx, &s3.GetObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(probeKey), + }) + if err != nil { + // this shouldn't happen, since we just wrote the object + return nil, err + } + defer result.Body.Close() + got, err := io.ReadAll(result.Body) + if err != nil { + return nil, err + } + slog.Debug("Successfully read object", slog.String("content", string(got))) + if string(got) != content { + return nil, fmt.Errorf("unexpected content: got %q, want %q", got, content) + } + _, err = s3Client.DeleteObject(ctx, &s3.DeleteObjectInput{ + Bucket: aws.String(bucketName), + Key: aws.String(probeKey), + }) + if err != nil { + return nil, err + } + slog.Debug("Suggested params", slog.Any("env", alt.Params())) + return alt, nil + } + return nil, fmt.Errorf("unable to connect to storage provider %q", s.dest) +} diff --git a/internal/store/s3_test.go b/internal/store/s3_test.go new file mode 100644 index 0000000..496f881 --- /dev/null +++ b/internal/store/s3_test.go @@ -0,0 +1,284 @@ +// Copyright 2025 Cockroach Labs, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package store + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/cockroachdb/field-eng-powertools/stopper" + "github.com/cockroachlabs-field/blobcheck/internal/env" +) + +func TestS3Alternates(t *testing.T) { + + tests := []struct { + name string + params Params + dest string + want []Params + }{ + { + name: "basic params and dest", + params: Params{ + AccountParam: "AKIA...", + SecretParam: "SECRET...", + RegionParam: "us-east-1", + }, + dest: "bucket/key", + want: []Params{ + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-east-1"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-east-1", SkipChecksum: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-east-1", SkipTLSVerify: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-east-1", UsePathStyleParam: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-east-1", UsePathStyleParam: "true", SkipChecksum: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-east-1", UsePathStyleParam: "true", SkipTLSVerify: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-east-1", UsePathStyleParam: "true", SkipTLSVerify: "true", SkipChecksum: "true"}, + }, + }, + { + name: "params with endpoint", + params: Params{ + AccountParam: "AKIA...", + SecretParam: "SECRET...", + RegionParam: "us-west-2", + EndPointParam: "https://s3.example.com", + }, + dest: "bucket2/key2", + want: []Params{ + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-west-2", EndPointParam: "https://s3.example.com"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-west-2", EndPointParam: "https://s3.example.com", SkipChecksum: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-west-2", EndPointParam: "https://s3.example.com", SkipTLSVerify: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-west-2", EndPointParam: "https://s3.example.com", UsePathStyleParam: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-west-2", EndPointParam: "https://s3.example.com", UsePathStyleParam: "true", SkipChecksum: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-west-2", EndPointParam: "https://s3.example.com", UsePathStyleParam: "true", SkipTLSVerify: "true"}, + {AccountParam: "AKIA...", SecretParam: "SECRET...", RegionParam: "us-west-2", EndPointParam: "https://s3.example.com", UsePathStyleParam: "true", SkipTLSVerify: "true", SkipChecksum: "true"}, + }, + }, + { + name: "only region param", + params: Params{ + RegionParam: "eu-central-1", + }, + dest: "bucket3/key3", + want: []Params{ + {RegionParam: "eu-central-1"}, + {RegionParam: "eu-central-1", SkipChecksum: "true"}, + {RegionParam: "eu-central-1", SkipTLSVerify: "true"}, + {RegionParam: "eu-central-1", UsePathStyleParam: "true"}, + {RegionParam: "eu-central-1", UsePathStyleParam: "true", SkipChecksum: "true"}, + {RegionParam: "eu-central-1", UsePathStyleParam: "true", SkipTLSVerify: "true"}, + {RegionParam: "eu-central-1", UsePathStyleParam: "true", SkipTLSVerify: "true", SkipChecksum: "true"}, + }, + }, + { + name: "empty params", + params: Params{}, + dest: "bucket4/key4", + want: []Params{ + {}, + {SkipChecksum: "true"}, + {SkipTLSVerify: "true"}, + {UsePathStyleParam: "true"}, + {UsePathStyleParam: "true", SkipChecksum: "true"}, + {UsePathStyleParam: "true", SkipTLSVerify: "true"}, + {UsePathStyleParam: "true", SkipTLSVerify: "true", SkipChecksum: "true"}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := &s3Store{ + params: tt.params, + dest: tt.dest, + } + gotSeq := s.candidateConfigs() + var got []Params + gotSeq(func(d Store) bool { + if alt, ok := d.(*s3Store); ok { + got = append(got, alt.params) + } + return true + }) + assert.Equal(t, len(tt.want), len(got), "s3.Alternates() count mismatch") + for i := range got { + assert.Equal(t, tt.want[i], got[i], "s3.Alternates()[%d] mismatch", i) + } + }) + } +} +func TestS3ParamsObfuscation(t *testing.T) { + tests := []struct { + name string + params Params + want Params + }{ + { + name: "obfuscate secret and token", + params: Params{ + AccountParam: "AKIA...", + SecretParam: "SECRET...", + TokenParam: "TOKEN...", + RegionParam: "us-east-1", + }, + want: Params{ + AccountParam: "AKIA...", + SecretParam: Obfuscated, + TokenParam: Obfuscated, + RegionParam: "us-east-1", + }, + }, + { + name: "no obfuscation needed", + params: Params{ + AccountParam: "AKIA...", + RegionParam: "us-west-2", + }, + want: Params{ + AccountParam: "AKIA...", + RegionParam: "us-west-2", + }, + }, + { + name: "only secret param", + params: Params{ + SecretParam: "SECRET...", + }, + want: Params{ + SecretParam: Obfuscated, + }, + }, + { + name: "empty params", + params: Params{}, + want: Params{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + s := &s3Store{ + params: tt.params, + dest: "bucket/key", + } + got := s.Params() + assert.Equal(t, tt.want, got) + }) + } +} + +const ( + endpoint = "http://localhost:29000" + account = "cockroach" + secret = "cockroach" + testPath = "test/minio" +) + +func TestMinioFromEnv(t *testing.T) { + tests := []struct { + name string + env map[string]string + endpoint string + want Params + wantErr error + }{ + { + name: "missing required env vars", + env: map[string]string{}, + endpoint: endpoint, + want: map[string]string{}, + wantErr: ErrMissingParam, + }, + { + name: "missing secret", + env: map[string]string{ + AccountParam: account, + }, + endpoint: endpoint, + want: Params{}, + wantErr: ErrMissingParam, + }, + { + name: "missing account", + env: map[string]string{ + SecretParam: secret, + }, + endpoint: endpoint, + want: Params{}, + wantErr: ErrMissingParam, + }, + { + name: "no region param", + env: map[string]string{ + AccountParam: account, + SecretParam: secret, + }, + endpoint: endpoint, + want: Params{ + AccountParam: account, + SecretParam: secret, + RegionParam: DefaultRegion, + EndPointParam: endpoint, + UsePathStyleParam: "true", + }, + }, + { + name: "region param", + env: map[string]string{ + AccountParam: account, + SecretParam: secret, + RegionParam: "us-east-1", + }, + endpoint: endpoint, + want: Params{ + AccountParam: account, + SecretParam: secret, + RegionParam: "us-east-1", + EndPointParam: endpoint, + UsePathStyleParam: "true", + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := stopper.WithContext(t.Context()) + lookup := func(key string) (string, bool) { + res, ok := tt.env[key] + return res, ok + } + env := &env.Env{ + Path: testPath, + Endpoint: tt.endpoint, + LookupEnv: lookup, + Testing: true, + } + + store, err := S3FromEnv(ctx, env) + if tt.wantErr != nil { + assert.Nil(t, store) + assert.ErrorIs(t, err, tt.wantErr) + return + } + require.NoError(t, err) + s3 := (store.(*s3Store)) + assert.Equal(t, tt.want, s3.params) + assert.Regexp(t, fmt.Sprintf("^%s", testPath), s3.dest) + }) + } +} diff --git a/internal/store/store.go b/internal/store/store.go index eb06dbd..bf4e062 100644 --- a/internal/store/store.go +++ b/internal/store/store.go @@ -12,11 +12,37 @@ // See the License for the specific language governing permissions and // limitations under the License. +// TODO(silvano): rename this package to "externalStorage"/"blobStorage" package store +import ( + "iter" + "slices" +) + // Params represents the parameters to be set for a destination to perform a backup/restore. type Params map[string]string +func (p Params) sortedKeys() []string { + keys := make([]string, 0, len(p)) + for k := range p { + keys = append(keys, k) + } + slices.Sort(keys) + return keys +} + +// Iter returns an iterator over the parameters sorted by key. +func (p Params) Iter() iter.Seq2[string, string] { + return func(yield func(string, string) bool) { + for _, k := range p.sortedKeys() { + if !yield(k, p[k]) { + return + } + } + } +} + // Store represents a destination to perform a backup/restore. type Store interface { // Params returns a copy of the params. diff --git a/internal/store/store_test.go b/internal/store/store_test.go new file mode 100644 index 0000000..025920d --- /dev/null +++ b/internal/store/store_test.go @@ -0,0 +1,67 @@ +// Copyright 2025 Cockroach Labs, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package store + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestParamsIter verifies that Params.Iter yields keys and values in sorted order. +func TestParamsIter(t *testing.T) { + p := Params{ + "zeta": "last", + "alpha": "first", + "middle": "second", + "beta": "third", + "delta": "fourth", + "epsilon": "fifth", + "gamma": "sixth", + "Aardvark": "seventh", // capital letter to test ASCII ordering + } + + var gotKeys []string + var gotVals []string + for k, v := range p.Iter() { + gotKeys = append(gotKeys, k) + gotVals = append(gotVals, v) + } + + // Keys should be sorted in standard Go string order (ASCII/UTF-8 ordering). + wantKeys := []string{ + "Aardvark", + "alpha", + "beta", + "delta", + "epsilon", + "gamma", + "middle", + "zeta", + } + wantVals := []string{ + "seventh", + "first", + "third", + "fourth", + "fifth", + "sixth", + "second", + "last", + } + a := assert.New(t) + a.Equal(gotKeys, wantKeys) + a.Equal(gotVals, wantVals) +}