Skip to content

Commit

Permalink
s3: the capability to set custom s3 endpoint
Browse files Browse the repository at this point in the history
* proposed at #94
* global S3_ENDPOINT
* bucket configuration
* docs

Signed-off-by: Alex Aizman <alex.aizman@gmail.com>
  • Loading branch information
alex-aizman committed Jun 16, 2022
1 parent 7942a27 commit e8a5cfa
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 19 deletions.
53 changes: 38 additions & 15 deletions ais/backend/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"sync"
Expand All @@ -35,6 +36,11 @@ import (
const (
awsChecksumType = "x-amz-meta-ais-cksum-type"
awsChecksumVal = "x-amz-meta-ais-cksum-val"

// environment variable to globally override the default 'https://s3.amazonaws.com' endpoint
// NOTE: the same can be done on a per-bucket basis, via bucket prop `Extra.AWS.Endpoint`
// (and of course, bucket override will take precedence)
awsEnvS3Endpoint = "S3_ENDPOINT"
)

type (
Expand All @@ -48,15 +54,17 @@ type (
)

var (
clients map[string]*s3.S3 // one client per AWS region
cmu sync.RWMutex
clients map[string]map[string]*s3.S3 // one client per (region, endpoint)
cmu sync.RWMutex
s3Endpoint string
)

// interface guard
var _ cluster.BackendProvider = (*awsProvider)(nil)

func NewAWS(t cluster.Target) (cluster.BackendProvider, error) {
clients = make(map[string]*s3.S3, 2)
clients = make(map[string]map[string]*s3.S3, 2)
s3Endpoint = os.Getenv(awsEnvS3Endpoint)
return &awsProvider{t: t}, nil
}

Expand Down Expand Up @@ -111,7 +119,8 @@ func (*awsProvider) HeadBucket(_ ctx, bck *cluster.Bck) (bckProps cos.SimpleKVs,
}
bckProps = make(cos.SimpleKVs, 4)
bckProps[apc.HdrBackendProvider] = apc.ProviderAmazon
bckProps[apc.HdrCloudRegion] = region
bckProps[apc.HdrS3Region] = region
bckProps[apc.HdrS3Endpoint] = ""
bckProps[apc.HdrBucketVerEnabled] = strconv.FormatBool(
result.Status != nil && *result.Status == s3.BucketVersioningStatusEnabled,
)
Expand Down Expand Up @@ -459,29 +468,37 @@ func (*awsProvider) DeleteObj(lom *cluster.LOM) (errCode int, err error) {
// static helpers
//

// newClient creates new S3 client that can be used to make requests. It is
// guaranteed that the client is initialized even in case of errors.
// newClient creates new S3 client on a per-region basis or, more precisely,
// per (region, endpoint) pair - and note that s3 endpoint is per-bucket configurable.
// If the client already exists newClient simply returns it.
//
// Quoting S3 SDK:
// From S3 SDK:
// "S3 methods are safe to use concurrently. It is not safe to
// modify mutate any of the struct's properties though."
func newClient(conf sessConf, tag string) (svc *s3.S3, region string, err error) {
endpoint := s3Endpoint
region = conf.region
if region == "" && conf.bck != nil && conf.bck.Props != nil {
region = conf.bck.Props.Extra.AWS.CloudRegion
if conf.bck != nil && conf.bck.Props != nil {
if region == "" {
region = conf.bck.Props.Extra.AWS.CloudRegion
}
if conf.bck.Props.Extra.AWS.Endpoint != "" {
endpoint = conf.bck.Props.Extra.AWS.Endpoint
}
}

// reuse
if region != "" {
cmu.RLock()
svc = clients[region]
svc = clients[region][endpoint]
cmu.RUnlock()
if svc != nil {
return
}
}
// create
var (
sess = _session()
sess = _session(endpoint)
awsConf = &aws.Config{}
)
if region == "" {
Expand All @@ -497,17 +514,23 @@ func newClient(conf sessConf, tag string) (svc *s3.S3, region string, err error)
debug.Assertf(region == *svc.Config.Region, "%s != %s", region, *svc.Config.Region)

cmu.Lock()
clients[region] = svc
eps := clients[region]
if eps == nil {
eps = make(map[string]*s3.S3, 1)
clients[region] = eps
}
eps[endpoint] = svc
cmu.Unlock()
return
}

// Create session using default creds from ~/.aws/credentials and environment variables.
func _session() *session.Session {
// TODO: avoid creating sessions for each request
func _session(endpoint string) *session.Session {
config := aws.Config{HTTPClient: cmn.NewClient(cmn.TransportArgs{})}
config.WithEndpoint(endpoint) // normally empty but could also be `Props.Extra.AWS.Endpoint` or `os.Getenv(awsEnvS3Endpoint)`
return session.Must(session.NewSessionWithOptions(session.Options{
SharedConfigState: session.SharedConfigEnable,
Config: aws.Config{HTTPClient: cmn.NewClient(cmn.TransportArgs{})},
Config: config,
}))
}

Expand Down
3 changes: 2 additions & 1 deletion ais/bucketmeta.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,8 @@ func mergeRemoteBckProps(props *cmn.BucketProps, header http.Header) *cmn.Bucket
debug.Assert(len(header) > 0)
switch props.Provider {
case apc.ProviderAmazon:
props.Extra.AWS.CloudRegion = header.Get(apc.HdrCloudRegion)
props.Extra.AWS.CloudRegion = header.Get(apc.HdrS3Region)
props.Extra.AWS.Endpoint = header.Get(apc.HdrS3Endpoint)
case apc.ProviderHTTP:
props.Extra.HTTP.OrigURLBck = header.Get(apc.HdrOrigURLBck)
}
Expand Down
9 changes: 6 additions & 3 deletions api/apc/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,14 @@ const (

// Bucket props headers.
HdrBucketProps = HeaderPrefix + "bucket-props"
HdrOrigURLBck = HeaderPrefix + "original-url" // See: BucketProps.Extra.HTTP.OrigURLBck
HdrCloudRegion = HeaderPrefix + "cloud-region" // See: BucketProps.Extra.AWS.CloudRegion
HdrBucketVerEnabled = HeaderPrefix + "versioning-enabled" // Enable/disable object versioning in a bucket.
HdrBucketCreated = HeaderPrefix + "created" // Bucket creation time.
HdrBackendProvider = HeaderPrefix + "provider" // ProviderAmazon et al. - see cmn/bucket.go.
HdrBackendProvider = HeaderPrefix + "provider" // ProviderAmazon et al. - see cmn/bck.go.
// including BucketProps.Extra.AWS
HdrS3Region = HeaderPrefix + "cloud_region"
HdrS3Endpoint = HeaderPrefix + "endpoint"
// including BucketProps.Extra.HTTP
HdrOrigURLBck = HeaderPrefix + "original-url"

HdrRemoteOffline = HeaderPrefix + "remote-offline" // When accessing cached remote bucket with no backend connectivity.

Expand Down
6 changes: 6 additions & 0 deletions cmn/api_bprops.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,15 @@ type (

ExtraPropsAWS struct {
CloudRegion string `json:"cloud_region,omitempty" list:"readonly"`

// from https://github.com/aws/aws-sdk-go/blob/main/aws/config.go:
// "An optional endpoint URL (hostname only or fully qualified URI)
// that overrides the default generated endpoint."
Endpoint string `json:"endpoint,omitempty"`
}
ExtraPropsAWSToUpdate struct {
CloudRegion *string `json:"cloud_region"`
Endpoint *string `json:"endpoint"`
}

ExtraPropsHTTP struct {
Expand Down
43 changes: 43 additions & 0 deletions docs/cli/bucket.md
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,49 @@ Bucket props successfully updated
"access" set to:"GET,HEAD-OBJECT,HEAD-BUCKET,LIST-OBJECTS" (was:"<PREV_ACCESS_LIST>")
```

#### Configure custom AWS S3 endpoint

When a bucket is hosted by an S3 compliant backend (such as, e.g., minio), we may want to specify an alternative S3 endpoint,
so that AIS nodes use it when reading, writing, listing, and generally, performing all operations on remote S3 bucket(s).

Globally, S3 endpoint can be overridden for _all_ S3 buckets via "S3_ENDPOINT" environment.
If you decide to make the change, you may need to restart AIS cluster while making sure that "S3_ENDPOINT" is available for the AIS nodes
when they are starting up.

But it can be also be done - and will take precedence over global setting - on a per-bucket basis.

Here are some examples:

```console
# Let's say, s3://abc contains a single object:
$ ais ls s3://abc
NAME SIZE
README.md 8.96KiB

# First, override empty the endpoint property in the bucket's configuration.
# Use the default AWS S3 endpoint `https://s3.amazonaws.com` (to see that it *applies* and works).
$ ais bucket props set s3://abc extra.aws.endpoint=s3.amazonaws.com
Bucket "aws://abc": property "extra.aws.endpoint=s3.amazonaws.com", nothing to do
$ ais ls s3://abc
NAME SIZE
README.md 8.96KiB

# Second, set it to invalid value, and observe that the bucket becomes inaccessible:
$ ais bucket props set s3://abc extra.aws.endpoint=foo
Bucket props successfully updated
"extra.aws.endpoint" set to: "foo" (was: "s3.amazonaws.com")
$ ais ls s3://abc
RequestError: send request failed: dial tcp: lookup abc.foo: no such host

# Finally, revert the endpoint back to empty and make sure the bucket is visible again:
$ ais bucket props set s3://abc extra.aws.endpoint=""
Bucket props successfully updated
"extra.aws.endpoint" set to: "" (was: "foo")
$ ais ls s3://abc
NAME SIZE
README.md 8.96KiB
```

#### Connect/Disconnect AIS bucket to/from cloud bucket

Set backend bucket for AIS bucket `bucket_name` to the GCP cloud bucket `cloud_bucket`.
Expand Down
13 changes: 13 additions & 0 deletions docs/s3compat.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@ redirect_from:
- /docs/s3compat.md/
---


AIS supports Amazon S3 in two distinct and different ways:

1. On the back, via [backend](providers.md) abstraction. Specifically for S3 the corresponding [backend](providers.md) implementation currently utilizes [AWS SDK for Go](https://aws.amazon.com/sdk-for-go);
2. On the front, AIS provides S3 compatible API, so that existing S3 applications could use AIStore out of the box and without the need to change their (existing) code.

This document talks about the latter - about Amazon S3 API compatibility (or simply, S3 compatibility).

For more references and background, see:

* [High-level AIS block diagram](overview.md#at-a-glance) that would maybe give a better meaning to the terms "back" and "front" used above.
* [Setting custom S3 endpoint](/docs/cli/bucket.md) - can be used e.g., when a bucket is hosted by an S3 compliant backend such as minio.

## Table of Contents

- [S3 Compatibility](#s3-compatibility)
Expand Down

0 comments on commit e8a5cfa

Please sign in to comment.