-
Notifications
You must be signed in to change notification settings - Fork 360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
724 bi collection improvments #770
Changes from 10 commits
5293ee8
93274e3
af13f47
3a953aa
4af09ee
0a8fd50
47f247a
18eaf4e
69b91c6
6e2b792
eb0c765
bf1867c
eb2db6c
f77f79f
9710b61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package aws | ||
|
||
import ( | ||
"github.com/aws/aws-sdk-go/aws" | ||
"github.com/aws/aws-sdk-go/aws/session" | ||
"github.com/aws/aws-sdk-go/service/s3" | ||
"github.com/aws/aws-sdk-go/service/sts" | ||
"github.com/treeverse/lakefs/logging" | ||
) | ||
|
||
type MetadataProvider struct { | ||
logger logging.Logger | ||
awsConfig *aws.Config | ||
} | ||
|
||
func NewMetadataProvider(logger logging.Logger, awsConfig *aws.Config) *MetadataProvider { | ||
return &MetadataProvider{logger: logger, awsConfig: awsConfig} | ||
} | ||
|
||
func (m *MetadataProvider) GetMetadata() map[string]string { | ||
sess, err := session.NewSession(m.awsConfig) | ||
if err != nil { | ||
m.logger.Errorf("%v: failed to create AWS session for BI", err) | ||
return nil | ||
} | ||
sess.ClientConfig(s3.ServiceName) | ||
stsClient := sts.New(sess) | ||
identity, err := stsClient.GetCallerIdentity(&sts.GetCallerIdentityInput{}) | ||
if err != nil { | ||
m.logger.Errorf("%v: failed to get AWS account ID for BI", err) | ||
return nil | ||
} | ||
return map[string]string{"aws_account_id": *identity.Account} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
package s3 | ||
package s3_inventory | ||
|
||
import ( | ||
"context" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
package s3 | ||
package s3_inventory | ||
|
||
import ( | ||
"context" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
package s3 | ||
package s3_inventory | ||
|
||
import "github.com/xitongsys/parquet-go/reader" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
package s3 | ||
package s3_inventory | ||
|
||
import ( | ||
"context" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
package s3 | ||
package s3_inventory | ||
|
||
import ( | ||
"context" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
package gcp | ||
|
||
import ( | ||
"cloud.google.com/go/compute/metadata" | ||
"github.com/treeverse/lakefs/logging" | ||
) | ||
|
||
type MetadataProvider struct { | ||
logger logging.Logger | ||
} | ||
|
||
func NewMetadataProvider(logger logging.Logger) *MetadataProvider { | ||
return &MetadataProvider{logger: logger} | ||
} | ||
|
||
func (m *MetadataProvider) GetMetadata() map[string]string { | ||
projectID, err := metadata.NumericProjectID() | ||
if err != nil { | ||
m.logger.Errorf("%v: failed to get Google numeric project ID from instance metadata", err) | ||
return nil | ||
} | ||
return map[string]string{"google_numeric_project_id": projectID} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. another thing that's missing (sorry for not catching this earlier) is one way hashing: we never want to send the actual account id, just to know that different installations come from the same one. I suggest hashing the received id here and in the aws provider as well. in terms of metadata fields, i would seperate it into 2 fields: account_type ("aws", "gcp", etc) and "account_id" which is the hash. would make it easier downstream to consume. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package cloud | ||
|
||
import ( | ||
"github.com/treeverse/lakefs/block/gs" | ||
s3a "github.com/treeverse/lakefs/block/s3" | ||
"github.com/treeverse/lakefs/cloud/aws" | ||
"github.com/treeverse/lakefs/cloud/gcp" | ||
"github.com/treeverse/lakefs/config" | ||
"github.com/treeverse/lakefs/logging" | ||
) | ||
|
||
type MetadataProvider interface { | ||
GetMetadata() map[string]string | ||
} | ||
|
||
func BuildMetadataProvider(logger logging.Logger, c *config.Config) MetadataProvider { | ||
switch c.GetBlockstoreType() { | ||
case gs.BlockstoreType: | ||
return gcp.NewMetadataProvider(logger) | ||
case s3a.BlockstoreType: | ||
return aws.NewMetadataProvider(logger, c.GetAwsConfig()) | ||
default: | ||
return nil | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be a warning - it doesn't affect the health or correctness of lakeFS in any way (nor do we ever request explicit permissions to do this operation (also true for the AWS metadata provider))