Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve AWS IAM unique IDs #2814

Merged
merged 5 commits into from
Jun 7, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 81 additions & 0 deletions builtin/credential/aws/backend.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package awsauth

import (
"fmt"
"sync"
"time"

"github.com/aws/aws-sdk-go/aws/endpoints"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/iam"
"github.com/hashicorp/vault/logical"
Expand Down Expand Up @@ -54,6 +56,15 @@ type backend struct {
// When the credentials are modified or deleted, all the cached client objects
// will be flushed. The empty STS role signifies the master account
IAMClientsMap map[string]map[string]*iam.IAM

// AWS Account ID of the "default" AWS credentials
// This cache avoids the need to call GetCallerIdentity repeatedly to learn it
// We can't store this because, in certain pathological cases, it could change
// out from under us, such as a standby and active Vault server in different AWS
// accounts using their IAM instance profile to get their credentials.
defaultAWSAccountID string

resolveArnToUniqueId func(logical.Storage, string) (string, error)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we rename this to resolveArnToUniqueIDFunc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}

func Backend(conf *logical.BackendConfig) (*backend, error) {
Expand All @@ -65,6 +76,8 @@ func Backend(conf *logical.BackendConfig) (*backend, error) {
IAMClientsMap: make(map[string]map[string]*iam.IAM),
}

b.resolveArnToUniqueId = b.resolveArnToRealUniqueId

b.Backend = &framework.Backend{
PeriodicFunc: b.periodicFunc,
AuthRenew: b.pathLoginRenew,
Expand Down Expand Up @@ -171,7 +184,75 @@ func (b *backend) invalidate(key string) {
defer b.configMutex.Unlock()
b.flushCachedEC2Clients()
b.flushCachedIAMClients()
b.defaultAWSAccountID = ""
}
}

// Putting this here so we can inject a fake resolver into the backend for unit testing
// purposes
func (b *backend) resolveArnToRealUniqueId(s logical.Storage, arn string) (string, error) {
entity, err := parseIamArn(arn)
if err != nil {
return "", err
}
// This odd-looking code is here because IAM is an inherently global service. IAM and STS ARNs
// don't have regions in them, and there is only a single global endpoint for IAM; see
// http://docs.aws.amazon.com/general/latest/gr/rande.html#iam_region
// However, the ARNs do have a partition in them, because the GovCloud and China partitions DO
// have their own separate endpoints, and the partition is encoded in the ARN. If Amazon's Go SDK
// would allow us to pass a partition back to the IAM client, it would be much simpler. But it
// doesn't appear that's possible, so in order to properly support GovCloud and China, we do a
// circular dance of extracting the partition from the ARN, finding any arbitrary region in the
// partition, and passing that region back back to the SDK, so that the SDK can figure out the
// proper partition from the arbitrary region we passed in to look up the endpoint.
// Sigh
region := getAnyRegionForAwsPartition(entity.Partition)
if region == nil {
return "", fmt.Errorf("Unable to resolve partition %q to a region", entity.Partition)
}
iamClient, err := b.clientIAM(s, region.ID(), entity.AccountNumber)
if err != nil {
return "", err
}

switch entity.Type {
case "user":
userInfo, err := iamClient.GetUser(&iam.GetUserInput{UserName: &entity.FriendlyName})
if err != nil {
return "", err
}
return *userInfo.User.UserId, nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When err == nil, can we trust the responses from GetUser, GetRole and GetInstanceProfile to be non-nil. If not, can we add nil checks appropriately?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the current source, it's not possible. But, I don't see that as a guarantee documented anywhere, so better safe than sorry. I've added the additional checks to be safe.

case "role":
roleInfo, err := iamClient.GetRole(&iam.GetRoleInput{RoleName: &entity.FriendlyName})
if err != nil {
return "", err
}
return *roleInfo.Role.RoleId, nil
case "instance-profile":
profileInfo, err := iamClient.GetInstanceProfile(&iam.GetInstanceProfileInput{InstanceProfileName: &entity.FriendlyName})
if err != nil {
return "", err
}
return *profileInfo.InstanceProfile.InstanceProfileId, nil
default:
return "", fmt.Errorf("unrecognized error type %#v", entity.Type)
}
}

// Adapted from https://docs.aws.amazon.com/sdk-for-go/api/aws/endpoints/
// the "Enumerating Regions and Endpoint Metadata" section
func getAnyRegionForAwsPartition(partitionId string) *endpoints.Region {
resolver := endpoints.DefaultResolver()
partitions := resolver.(endpoints.EnumPartitions).Partitions()

for _, p := range partitions {
if p.ID() == partitionId {
for _, r := range p.Regions() {
return &r
}
}
}
return nil
}

const backendHelp = `
Expand Down
33 changes: 29 additions & 4 deletions builtin/credential/aws/backend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1346,7 +1346,7 @@ func TestBackendAcc_LoginWithCallerIdentity(t *testing.T) {
if err != nil {
t.Fatalf("Received error retrieving identity: %s", err)
}
testIdentityArn, _, _, err := parseIamArn(*testIdentity.Arn)
entity, err := parseIamArn(*testIdentity.Arn)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -1385,7 +1385,7 @@ func TestBackendAcc_LoginWithCallerIdentity(t *testing.T) {

// configuring the valid role we'll be able to login to
roleData := map[string]interface{}{
"bound_iam_principal_arn": testIdentityArn,
"bound_iam_principal_arn": entity.canonicalArn(),
"policies": "root",
"auth_type": iamAuthType,
}
Expand Down Expand Up @@ -1417,8 +1417,17 @@ func TestBackendAcc_LoginWithCallerIdentity(t *testing.T) {
t.Fatalf("bad: failed to create role; resp:%#v\nerr:%v", resp, err)
}

fakeArn := "arn:aws:iam::123456789012:role/FakeRole"
fakeArnResolver := func(s logical.Storage, arn string) (string, error) {
if arn == fakeArn {
return fmt.Sprintf("FakeUniqueIdFor%s", fakeArn), nil
}
return b.resolveArnToRealUniqueId(s, arn)
}
b.resolveArnToUniqueId = fakeArnResolver

// now we're creating the invalid role we won't be able to login to
roleData["bound_iam_principal_arn"] = "arn:aws:iam::123456789012:role/FakeRole"
roleData["bound_iam_principal_arn"] = fakeArn
roleRequest.Path = "role/" + testInvalidRoleName
resp, err = b.HandleRequest(roleRequest)
if err != nil || (resp != nil && resp.IsError()) {
Expand Down Expand Up @@ -1491,7 +1500,7 @@ func TestBackendAcc_LoginWithCallerIdentity(t *testing.T) {
t.Errorf("bad: expected failed login due to bad auth type: resp:%#v\nerr:%v", resp, err)
}

// finally, the happy path tests :)
// finally, the happy path test :)

loginData["role"] = testValidRoleName
resp, err = b.HandleRequest(loginRequest)
Expand All @@ -1501,4 +1510,20 @@ func TestBackendAcc_LoginWithCallerIdentity(t *testing.T) {
if resp == nil || resp.Auth == nil || resp.IsError() {
t.Errorf("bad: expected valid login: resp:%#v", resp)
}

// Now, fake out the unique ID resolver to ensure we fail login if the unique ID
// changes from under us
b.resolveArnToUniqueId = resolveArnToFakeUniqueId
// First, we need to update the role to force Vault to use our fake resolver to
// pick up the fake user ID
roleData["bound_iam_principal_arn"] = entity.canonicalArn()
roleRequest.Path = "role/" + testValidRoleName
resp, err = b.HandleRequest(roleRequest)
if err != nil || (resp != nil && resp.IsError()) {
t.Fatalf("bad: failed to recreate role: resp:%#v\nerr:%v", resp, err)
}
resp, err = b.HandleRequest(loginRequest)
if err != nil || resp == nil || !resp.IsError() {
t.Errorf("bad: expected failed login due to changed AWS role ID: resp: %#v\nerr:%v", resp, err)
}
}
66 changes: 51 additions & 15 deletions builtin/credential/aws/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/iam"
"github.com/aws/aws-sdk-go/service/sts"
"github.com/hashicorp/go-cleanhttp"
"github.com/hashicorp/vault/helper/awsutil"
"github.com/hashicorp/vault/logical"
Expand Down Expand Up @@ -70,7 +71,7 @@ func (b *backend) getRawClientConfig(s logical.Storage, region, clientType strin
// It uses getRawClientConfig to obtain config for the runtime environemnt, and if
// stsRole is a non-empty string, it will use AssumeRole to obtain a set of assumed
// credentials. The credentials will expire after 15 minutes but will auto-refresh.
func (b *backend) getClientConfig(s logical.Storage, region, stsRole, clientType string) (*aws.Config, error) {
func (b *backend) getClientConfig(s logical.Storage, region, stsRole, accountID, clientType string) (*aws.Config, error) {

config, err := b.getRawClientConfig(s, region, clientType)
if err != nil {
Expand All @@ -80,20 +81,36 @@ func (b *backend) getClientConfig(s logical.Storage, region, stsRole, clientType
return nil, fmt.Errorf("could not compile valid credentials through the default provider chain")
}

stsConfig, err := b.getRawClientConfig(s, region, "sts")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain this change? Will it always be a valid assumption to assume that you're getting STS credentials?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question (and I probably should have commented it).

Previously, Vault just assumed that if there wasn't an stsRole configured for the given account, to just use the default credentials provided. That worked because you were looking up two things:

  1. Instance IDs, which were basically guaranteed to be globally unique, so if you found the given instance ID, you knew it was the instance ID you were looking for.
  2. IAM instance profiles by name. These weren't globally unique across accounts, but it was OK because the only thing you looked for was the associated IAM role ARN, which does include the account number, so you were OK if you looked up the profile in the wrong account because it wouldn't have the associated IAM ARN you need.

In this PR, I need to resolve unique IDs, which requires looking up a particular IAM entity (user or role) in a given account with a given name. If I have an stsRole configured for the given account, then great, I'll just try to assume it. But, what if I don't have an stsRole defined for that account ID? I'm not a huge fan of the assumption that is being made that it should just look up using the default credentials -- I'm looking up the entity by name, but entities with the same name can exist in multiple accounts. I could check the returned ARN to ensure it has the expected account ID embedded in it, but this feels more correct to me. getClientConfig is being asked for an AWS API client in account 123, we should check to ensure that the client we're returning is actually in account 123. And that's the goal here.

Thinking through this a little more, there's a small issue here in that an operator could call auth/aws/config/sts with an account_id of 123456789012, but pass in an stsRole belonging to a different account, as the config/sts endpoint doesn't actually validate that the passed-in stsRole ARN belongs to account_id but I think that's probably a separate discussion (and I'll open a separate issue for it).

Will it always be a valid assumption to assume that you're getting STS credentials?

I'd put it a little differently. I believe it is a valid assumption that, if you can make any AWS API call, you are able to call sts:GetCallerIdentity. I've even tried an IAM policy to explicitly deny sts:GetCallerIdentity, and it still succeeds.

Does this all make sense?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My question was less about sts:GetCallerIdentity and more about whether it's safe to assume that you always want to use STS creds to make the client calls but from your explanation it sounds correct -- I assume that in any given account we should always be able to get STS creds? Or will this not be the case if they're not using IAM roles, in which case there needs to be a fallback?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how to answer your question because the phrase "STS creds" isn't well defined, so I'm not sure what you're asking and thus not sure how to answer. Let me try to explain a little more, and if I'm still not answering your question, it'd probably be best to discuss offline. Apologies if this response is a bit basic, but I'm just trying to understand where we're not connecting.

STS is both a service that requires credentials to authenticate to it and a service that can act as a source of credentials. The code in question does both. I'm pretty sure that any type of IAM entity can authenticate to STS.

it's safe to assume that you always want to use STS creds to make the client calls

Which client calls?

If an stsRole has been configured for a given target account ID, then we use the existing creds Vault has been configured with to authenticate to STS to ask for a new set of credentials corresponding to the stsRole, and it uses those credentials for future requests. I'm not really changing this code path at all -- I'm just pulling the existing code up a level so the results can be used, but no material changes are being made. And we're depending on customers to configure the IAM roles such that Vault is able to retrieve credentials via sts:AssumeRole, so the customer has said he/she wants to get credentials from STS.

If no stsRole has been configured for an account (the big change I'm making here), then we just use the existing creds Vault has been configured with to authenticate to sts:GetCallerIdentity to learn the identity of the creds being given; we're not asking for any new credentials from STS. And we continue to use the creds Vault has been given (i.e., not any temporary credentials retrieved via STS) to make future client calls.

it's safe to assume that you always want to use STS creds to make the client calls

Which client calls? The only times we ask STS for credentials is when clients have explicitly told Vault to go to STS for credentials for those calls, and I'm not changing that.

I assume that in any given account we should always be able to get STS creds?

In any given account, we should always be able to call sts:GetCallerIdentity using any valid creds, whether those are IAM user creds or assumed role creds. Beyond that, it depends. If users have set up the appropriate STS configs in Vault and have set up the appropriate IAM permissions in AWS, then yes. But if's really a user configuration issue.

Or will this not be the case if they're not using IAM roles

The stsRole must be an IAM role, yes. Otherwise, even if Vault is using, say, IAM User credentials, all this will still work. IAM users can call sts:GetCallerIdentity and IAM users can call sts:AssumeRole (as long as the appropriate IAM permissions have been set up).

Does this answer what you're asking? If not, what am I missing?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to go with "I'm out of my AWS depth here" and just trust you on this. Don't worry, it's not that you haven't answered my question, it's more that I'm not sure what my question is anymore, but your answer has convinced me that you know exactly what you're doing and I should leave it be :-D

if stsConfig == nil {
return nil, fmt.Errorf("could not configure STS client")
}
if err != nil {
return nil, err
}
if stsRole != "" {
assumeRoleConfig, err := b.getRawClientConfig(s, region, "sts")
if err != nil {
return nil, err
}
if assumeRoleConfig == nil {
return nil, fmt.Errorf("could not configure STS client")
}
assumedCredentials := stscreds.NewCredentials(session.New(assumeRoleConfig), stsRole)
assumedCredentials := stscreds.NewCredentials(session.New(stsConfig), stsRole)
// Test that we actually have permissions to assume the role
if _, err = assumedCredentials.Get(); err != nil {
return nil, err
}
config.Credentials = assumedCredentials
} else {
if b.defaultAWSAccountID == "" {
client := sts.New(session.New(stsConfig))
if client == nil {
return nil, fmt.Errorf("could not obtain sts client: %v", err)
}
inputParams := &sts.GetCallerIdentityInput{}
identity, err := client.GetCallerIdentity(inputParams)
if err != nil {
return nil, fmt.Errorf("unable to fetch current caller: %v", err)
}
b.defaultAWSAccountID = *identity.Account
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a nil check for identity here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

}
if b.defaultAWSAccountID != accountID {
return nil, fmt.Errorf("unable to fetch client for account ID %s -- default client is for account %s", accountID, b.defaultAWSAccountID)
}
}

return config, nil
Expand Down Expand Up @@ -121,8 +138,25 @@ func (b *backend) flushCachedIAMClients() {
}
}

func (b *backend) stsRoleForAccount(s logical.Storage, accountID string) (string, error) {
// Check if an STS configuration exists for the AWS account
sts, err := b.lockedAwsStsEntry(s, accountID)
if err != nil {
return "", fmt.Errorf("error fetching STS config for account ID %q: %q\n", accountID, err)
}
// An empty STS role signifies the master account
if sts != nil {
return sts.StsRole, nil
}
return "", nil
}

// clientEC2 creates a client to interact with AWS EC2 API
func (b *backend) clientEC2(s logical.Storage, region string, stsRole string) (*ec2.EC2, error) {
func (b *backend) clientEC2(s logical.Storage, region, accountID string) (*ec2.EC2, error) {
stsRole, err := b.stsRoleForAccount(s, accountID)
if err != nil {
return nil, err
}
b.configMutex.RLock()
if b.EC2ClientsMap[region] != nil && b.EC2ClientsMap[region][stsRole] != nil {
defer b.configMutex.RUnlock()
Expand All @@ -142,8 +176,7 @@ func (b *backend) clientEC2(s logical.Storage, region string, stsRole string) (*

// Create an AWS config object using a chain of providers
var awsConfig *aws.Config
var err error
awsConfig, err = b.getClientConfig(s, region, stsRole, "ec2")
awsConfig, err = b.getClientConfig(s, region, stsRole, accountID, "ec2")

if err != nil {
return nil, err
Expand All @@ -168,7 +201,11 @@ func (b *backend) clientEC2(s logical.Storage, region string, stsRole string) (*
}

// clientIAM creates a client to interact with AWS IAM API
func (b *backend) clientIAM(s logical.Storage, region string, stsRole string) (*iam.IAM, error) {
func (b *backend) clientIAM(s logical.Storage, region, accountID string) (*iam.IAM, error) {
stsRole, err := b.stsRoleForAccount(s, accountID)
if err != nil {
return nil, err
}
b.configMutex.RLock()
if b.IAMClientsMap[region] != nil && b.IAMClientsMap[region][stsRole] != nil {
defer b.configMutex.RUnlock()
Expand All @@ -188,8 +225,7 @@ func (b *backend) clientIAM(s logical.Storage, region string, stsRole string) (*

// Create an AWS config object using a chain of providers
var awsConfig *aws.Config
var err error
awsConfig, err = b.getClientConfig(s, region, stsRole, "iam")
awsConfig, err = b.getClientConfig(s, region, stsRole, accountID, "iam")

if err != nil {
return nil, err
Expand Down
4 changes: 4 additions & 0 deletions builtin/credential/aws/path_config_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ func (b *backend) pathConfigClientDelete(
// Remove all the cached EC2 client objects in the backend.
b.flushCachedIAMClients()

// unset the cached default AWS account ID
b.defaultAWSAccountID = ""

return nil, nil
}

Expand Down Expand Up @@ -234,6 +237,7 @@ func (b *backend) pathConfigClientCreateUpdate(
if changedCreds {
b.flushCachedEC2Clients()
b.flushCachedIAMClients()
b.defaultAWSAccountID = ""
}

return nil, nil
Expand Down
Loading