From 2467fe5cc45f5fffcc3b01b0928875e426f633cc Mon Sep 17 00:00:00 2001 From: Rushikesh Butley Date: Thu, 18 Apr 2024 12:50:43 -0700 Subject: [PATCH] Organization List Feature in Server AWS Node Attester Plugin "aws_iid" (#4838) * Add New Organization Feature Signed-off-by: Rushikesh Butley --- conf/server/server_full.conf | 23 ++ doc/plugin_server_nodeattestor_aws_iid.md | 55 +++- go.mod | 5 +- go.sum | 6 +- .../plugin/nodeattestor/awsiid/client.go | 27 +- pkg/server/plugin/nodeattestor/awsiid/iid.go | 90 ++++++- .../plugin/nodeattestor/awsiid/iid_test.go | 124 ++++++++- .../nodeattestor/awsiid/organization.go | 254 ++++++++++++++++++ .../nodeattestor/awsiid/organization_test.go | 140 ++++++++++ 9 files changed, 707 insertions(+), 17 deletions(-) create mode 100644 pkg/server/plugin/nodeattestor/awsiid/organization.go create mode 100644 pkg/server/plugin/nodeattestor/awsiid/organization_test.go diff --git a/conf/server/server_full.conf b/conf/server/server_full.conf index 769db77c16..4d21baafce 100644 --- a/conf/server/server_full.conf +++ b/conf/server/server_full.conf @@ -382,6 +382,29 @@ plugins { # # make sure that the underlying root volume has not been detached # # prior to attestation. Default: false # # skip_block_device = false + + # # verify_organization: Verify if the attesting node's account ID is part of the configured AWS Organization. + # # Make sure that the IAM role formed from the configuration below (e.g., "arn:aws:iam::management_account_id:role/assume_org_role") + # # can be assumed by the spire-server. + # verify_organization { + # # management_account_id: Management account ID, also known as the root account, + # # value will be the respective organization's management/root account ID. It's a required parameter. + # # management_account_id = "" + + # # management_account_region: Management account region, specifies the region + # # in which the management account is hosted. It's an optional parameter. + # # Default: us-west-2 + # # management_account_region = "" + + # # assume_org_role: Assume org role, specifies the role name present in the management + # # account. It's a required parameter. + # # assume_org_role = "" + + # # org_account_map_ttl: Org account map TTL, specifies the interval for retrieving the list of accounts present in the Organization. + # # It's an optional parameter. If specified, it should be greater than or equal to the duration of 1m (minute). + # # Default: 3m. + # # org_account_map_ttl = "" + # } # } # } diff --git a/doc/plugin_server_nodeattestor_aws_iid.md b/doc/plugin_server_nodeattestor_aws_iid.md index 98309d210b..68babbcf96 100644 --- a/doc/plugin_server_nodeattestor_aws_iid.md +++ b/doc/plugin_server_nodeattestor_aws_iid.md @@ -19,8 +19,9 @@ this plugin resolves the agent's AWS IID-based SPIFFE ID into a set of selectors | `disable_instance_profile_selectors` | Disables retrieving the attesting instance profile information that is used in the selectors. Useful in cases where the server cannot reach iam.amazonaws.com | false | | `assume_role` | The role to assume | Empty string, Optional parameter. | | `partition` | The AWS partition SPIRE server is running in <aws|aws-cn|aws-us-gov> | aws | +| `verify_organization` | Verify that nodes belong to a specified AWS Organization [see below](#enabling-aws-node-attestation-organization-validation) | | -A sample configuration: +Sample configuration: ```hcl NodeAttestor "aws_iid" { @@ -45,6 +46,58 @@ In the following configuration, assuming AWS IID document sent from the spire agent contains `accountId : 12345678`, the spire server will assume "arn:aws:iam::12345678:role/spire-server-delegate" role before making any AWS call for the node attestation. If `assume_role` is configured, the spire server will always assume the role even if the both the spire-server and the spire agent is deployed in the same account. +## Enabling AWS Node Attestation Organization Validation + +For configuring AWS Node attestation method with organization validation following configuration can be used: + +| Field Name | Description | Constraints | +|----------------------------|-----------------------------------------------------------------------------------------------|--------------------------------------------| +| management_account_id | Account id of the organzation | required | +| management_account_region | Region of management account id | optional | +| assume_org_role | IAM Role name, with capablities to list accounts | required | +| org_account_map_ttl | Cache the list of accounts for particular time. Should be >= 1 minute. Defaults to 3 minute. | optional | + +Using the block `verify_organization` the org validation node attestation method will be enabled. With above configuration spire server will form and try to assume the role as: `arn:aws:iam::management_account_id:role/assume_org_role`. When not used, block ex. `verify_organization = {}` should not be empty, it should be completely removed as its optional or should have all required parameters namely `management_account_id`, `assume_org_role`. + +The role under: `assume_role` must be created in the management account: `management_account_id`, and it should have a trust relationship with the role assumed by spire server. Below is a sample policy depicting the permissions required along with the trust relationship that needs to be created in management account. + +Policy : + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Action": "organizations:ListAccounts", + "Effect": "Allow", + "Resource": "*", + "Sid": "SpireOrganizationListAccountRole" + } + ] +} +``` + +Trust Relationship + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "CrossAccountAssumeRolePolicy", + "Effect": "Allow", + "Principal": { + "AWS": [ + "arn:aws:iam::account-id-where-spire-is-running:role/spire-control-plane-root-server", + "arn:aws:iam::account-id-where-spire-is-running:role/spire-control-plane-regional-server" + ] + }, + "Action": "sts:AssumeRole" + } + ] +} +``` + ## Disabling Instance Profile Selectors In cases where spire-server is running in a location with no public internet access available, setting `disable_instance_profile_selectors = true` will prevent the server from making requests to `iam.amazonaws.com`. This is needed as spire-server will fail to attest nodes as it cannot retrieve the metadata information. diff --git a/go.mod b/go.mod index ab88f04747..c2a92bedd2 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/spiffe/spire -go 1.22 +go 1.22.2 require ( cloud.google.com/go/iam v1.1.7 @@ -26,6 +26,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/ec2 v1.157.0 github.com/aws/aws-sdk-go-v2/service/iam v1.32.0 github.com/aws/aws-sdk-go-v2/service/kms v1.31.0 + github.com/aws/aws-sdk-go-v2/service/organizations v1.27.3 github.com/aws/aws-sdk-go-v2/service/s3 v1.53.0 github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.28.1 github.com/aws/aws-sdk-go-v2/service/sts v1.28.5 @@ -306,7 +307,7 @@ require ( github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/numcpus v0.6.1 // indirect github.com/transparency-dev/merkle v0.0.2 // indirect - github.com/twmb/murmur3 v1.1.6 // indirect + github.com/twmb/murmur3 v1.1.8 // indirect github.com/vbatts/tar-split v0.11.5 // indirect github.com/xanzy/go-gitlab v0.102.0 // indirect github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect diff --git a/go.sum b/go.sum index 82395eb23a..39ff4e039b 100644 --- a/go.sum +++ b/go.sum @@ -601,6 +601,8 @@ github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.4 h1:uDj2K47EM1reAY github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.17.4/go.mod h1:XKCODf4RKHppc96c2EZBGV/oCUC7OClxAo2MEyg4pIk= github.com/aws/aws-sdk-go-v2/service/kms v1.31.0 h1:yl7wcqbisxPzknJVfWTLnK83McUvXba+pz2+tPbIUmQ= github.com/aws/aws-sdk-go-v2/service/kms v1.31.0/go.mod h1:2snWQJQUKsbN66vAawJuOGX7dr37pfOq9hb0tZDGIqQ= +github.com/aws/aws-sdk-go-v2/service/organizations v1.27.3 h1:CnPWlONzFX9/yO6IGuKg9sWUE8WhKztYRFbhmOHXjJI= +github.com/aws/aws-sdk-go-v2/service/organizations v1.27.3/go.mod h1:hUHSXe9HFEmLfHrXndAX5e69rv0nBsg22VuNQYl0JLM= github.com/aws/aws-sdk-go-v2/service/s3 v1.53.0 h1:r3o2YsgW9zRcIP3Q0WCmttFVhTuugeKIvT5z9xDspc0= github.com/aws/aws-sdk-go-v2/service/s3 v1.53.0/go.mod h1:w2E4f8PUfNtyjfL6Iu+mWI96FGttE03z3UdNcUEC4tA= github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.28.1 h1:DtKw4TxZT3VrzYupXQJPBqT9ImyobZZE+JIQPPAVxqs= @@ -1455,8 +1457,8 @@ github.com/transparency-dev/merkle v0.0.2 h1:Q9nBoQcZcgPamMkGn7ghV8XiTZ/kRxn1yCG github.com/transparency-dev/merkle v0.0.2/go.mod h1:pqSy+OXefQ1EDUVmAJ8MUhHB9TXGuzVAT58PqBoHz1A= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= github.com/twmb/murmur3 v1.1.5/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= -github.com/twmb/murmur3 v1.1.6 h1:mqrRot1BRxm+Yct+vavLMou2/iJt0tNVTTC0QoIjaZg= -github.com/twmb/murmur3 v1.1.6/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= +github.com/twmb/murmur3 v1.1.8 h1:8Yt9taO/WN3l08xErzjeschgZU2QSrwm1kclYq+0aRg= +github.com/twmb/murmur3 v1.1.8/go.mod h1:Qq/R7NUyOfr65zD+6Q5IHKsJLwP7exErjN6lyyq3OSQ= github.com/uber-go/tally/v4 v4.1.16 h1:by2hveWRh/cUReButk6ns1sHK/hiKry7BuOV6iY16XI= github.com/uber-go/tally/v4 v4.1.16/go.mod h1:RW5DgqsyEPs0lA4b0YNf4zKj7DveKHd73hnO6zVlyW0= github.com/valyala/fastjson v1.6.4 h1:uAUNq9Z6ymTgGhcm0UynUAB6tlbakBrz6CQFax3BXVQ= diff --git a/pkg/server/plugin/nodeattestor/awsiid/client.go b/pkg/server/plugin/nodeattestor/awsiid/client.go index 7935e4a0c8..8325203c22 100644 --- a/pkg/server/plugin/nodeattestor/awsiid/client.go +++ b/pkg/server/plugin/nodeattestor/awsiid/client.go @@ -7,6 +7,7 @@ import ( "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/iam" + "github.com/aws/aws-sdk-go-v2/service/organizations" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" ) @@ -18,11 +19,13 @@ var ( type Client interface { ec2.DescribeInstancesAPIClient iam.GetInstanceProfileAPIClient + organizations.ListAccountsAPIClient } type clientsCache struct { mtx sync.RWMutex config *SessionConfig + orgConfig *orgValidationConfig clients map[string]*cacheEntry newClient newClientCallback } @@ -32,7 +35,7 @@ type cacheEntry struct { client Client } -type newClientCallback func(ctx context.Context, config *SessionConfig, region string, assumeRoleARN string) (Client, error) +type newClientCallback func(ctx context.Context, config *SessionConfig, region string, assumeRoleARN string, orgRoleARN string) (Client, error) func newClientsCache(newClient newClientCallback) *clientsCache { return &clientsCache{ @@ -41,10 +44,11 @@ func newClientsCache(newClient newClientCallback) *clientsCache { } } -func (cc *clientsCache) configure(config SessionConfig) { +func (cc *clientsCache) configure(config SessionConfig, orgConfig orgValidationConfig) { cc.mtx.Lock() cc.clients = make(map[string]*cacheEntry) cc.config = &config + cc.orgConfig = &orgConfig cc.mtx.Unlock() } @@ -81,7 +85,13 @@ func (cc *clientsCache) getClient(ctx context.Context, region, accountID string) assumeRoleArn = fmt.Sprintf("arn:%s:iam::%s:role/%s", cc.config.Partition, accountID, cc.config.AssumeRole) } - client, err := cc.newClient(ctx, cc.config, region, assumeRoleArn) + // If organization attestation feature is enabled, assume org role + var orgRoleArn string + if cc.orgConfig.AccountRole != "" { + orgRoleArn = fmt.Sprintf("arn:%s:iam::%s:role/%s", cc.config.Partition, cc.orgConfig.AccountID, cc.orgConfig.AccountRole) + } + + client, err := cc.newClient(ctx, cc.config, region, assumeRoleArn, orgRoleArn) if err != nil { return nil, status.Errorf(codes.Internal, "failed to create client: %v", err) } @@ -103,16 +113,25 @@ func (cc *clientsCache) getCachedClient(cacheKey string) *cacheEntry { return r } -func newClient(ctx context.Context, config *SessionConfig, region string, assumeRoleARN string) (Client, error) { +func newClient(ctx context.Context, config *SessionConfig, region string, assumeRoleARN string, orgRoleArn string) (Client, error) { conf, err := newAWSConfig(ctx, config.AccessKeyID, config.SecretAccessKey, region, assumeRoleARN) if err != nil { return nil, err } + + // If the orgnizationAttestation feature is enabled, use the role configured for feature. + orgConf, err := newAWSConfig(ctx, config.AccessKeyID, config.SecretAccessKey, region, orgRoleArn) + if err != nil { + return nil, err + } + return struct { iam.GetInstanceProfileAPIClient ec2.DescribeInstancesAPIClient + organizations.ListAccountsAPIClient }{ GetInstanceProfileAPIClient: iam.NewFromConfig(conf), DescribeInstancesAPIClient: ec2.NewFromConfig(conf), + ListAccountsAPIClient: organizations.NewFromConfig(orgConf), }, nil } diff --git a/pkg/server/plugin/nodeattestor/awsiid/iid.go b/pkg/server/plugin/nodeattestor/awsiid/iid.go index 2f8b2c82c0..1f8405490b 100644 --- a/pkg/server/plugin/nodeattestor/awsiid/iid.go +++ b/pkg/server/plugin/nodeattestor/awsiid/iid.go @@ -100,6 +100,8 @@ type IIDAttestorPlugin struct { mtx sync.RWMutex clients *clientsCache + orgValidation *orgValidator + // test hooks hooks struct { getAWSCACertificate func(string, PublicKeyType) (*x509.Certificate, error) @@ -112,12 +114,13 @@ type IIDAttestorPlugin struct { // IIDAttestorConfig holds hcl configuration for IID attestor plugin type IIDAttestorConfig struct { SessionConfig `hcl:",squash"` - SkipBlockDevice bool `hcl:"skip_block_device"` - DisableInstanceProfileSelectors bool `hcl:"disable_instance_profile_selectors"` - LocalValidAcctIDs []string `hcl:"account_ids_for_local_validation"` - AgentPathTemplate string `hcl:"agent_path_template"` - AssumeRole string `hcl:"assume_role"` - Partition string `hcl:"partition"` + SkipBlockDevice bool `hcl:"skip_block_device"` + DisableInstanceProfileSelectors bool `hcl:"disable_instance_profile_selectors"` + LocalValidAcctIDs []string `hcl:"account_ids_for_local_validation"` + AgentPathTemplate string `hcl:"agent_path_template"` + AssumeRole string `hcl:"assume_role"` + Partition string `hcl:"partition"` + ValidateOrgAccountID *orgValidationConfig `hcl:"verify_organization"` pathTemplate *agentpathtemplate.Template trustDomain spiffeid.TrustDomain getAWSCACertificate func(string, PublicKeyType) (*x509.Certificate, error) @@ -126,6 +129,7 @@ type IIDAttestorConfig struct { // New creates a new IIDAttestorPlugin. func New() *IIDAttestorPlugin { p := &IIDAttestorPlugin{} + p.orgValidation = newOrganizationValidationBase(&orgValidationConfig{}) p.clients = newClientsCache(defaultNewClientCallback) p.hooks.getAWSCACertificate = getAWSCACertificate p.hooks.getenv = os.Getenv @@ -154,6 +158,26 @@ func (p *IIDAttestorPlugin) Attest(stream nodeattestorv1.NodeAttestor_AttestServ return err } + // Feature account belongs to organization + // Get the account id of the node from attestation and then check if respective account belongs to organization + if c.ValidateOrgAccountID != nil { + ctxValidateOrg, cancel := context.WithTimeout(stream.Context(), awsTimeout) + defer cancel() + orgClient, err := p.clients.getClient(ctxValidateOrg, c.ValidateOrgAccountID.AccountRegion, c.ValidateOrgAccountID.AccountID) + if err != nil { + return status.Errorf(codes.Internal, "failed to get org client: %v", err) + } + + valid, err := p.orgValidation.IsMemberAccount(ctxValidateOrg, orgClient, attestationData.AccountID) + if err != nil { + return status.Errorf(codes.Internal, "failed aws ec2 attestation, issue while verifying if nodes account id: %v belong to org: %v", attestationData.AccountID, err) + } + + if !valid { + return status.Errorf(codes.Internal, "failed aws ec2 attestation, nodes account id: %v is not part of configured organization or doesn't have ACTIVE status", attestationData.AccountID) + } + } + inTrustAcctList := false for _, id := range c.LocalValidAcctIDs { if attestationData.AccountID == id { @@ -272,11 +296,27 @@ func (p *IIDAttestorPlugin) Configure(_ context.Context, req *configv1.Configure return nil, status.Errorf(codes.InvalidArgument, "invalid partition %q, must be one of: %v", config.Partition, partitions) } + // Check if Feature flag for account belongs to organization is enabled. + orgConfig := &orgValidationConfig{} + if config.ValidateOrgAccountID != nil { + err = validateOrganizationConfig(config) + if err != nil { + return nil, err + } + orgConfig = config.ValidateOrgAccountID + } + p.mtx.Lock() defer p.mtx.Unlock() p.config = config - p.clients.configure(config.SessionConfig) + p.clients.configure(config.SessionConfig, *orgConfig) + if config.ValidateOrgAccountID != nil { + // Setup required config, for validation and for bootstrapping org client + if err := p.orgValidation.configure(orgConfig); err != nil { + return nil, err + } + } return &configv1.ConfigureResponse{}, nil } @@ -284,6 +324,7 @@ func (p *IIDAttestorPlugin) Configure(_ context.Context, req *configv1.Configure // SetLogger sets this plugin's logger func (p *IIDAttestorPlugin) SetLogger(log hclog.Logger) { p.log = log + p.orgValidation.setLogger(log) } func (p *IIDAttestorPlugin) checkBlockDevice(instance ec2types.Instance) error { @@ -558,3 +599,38 @@ func isValidAWSPartition(partition string) bool { } return false } + +func validateOrganizationConfig(config *IIDAttestorConfig) error { + checkAccID := config.ValidateOrgAccountID.AccountID + checkAccRole := config.ValidateOrgAccountID.AccountRole + checkAccRegion := config.ValidateOrgAccountID.AccountRegion + + if checkAccID == "" || checkAccRole == "" { + return status.Errorf(codes.InvalidArgument, "please ensure that %q & %q are present inside block or remove the block: %q for feature node attestation using account id verification", orgAccountID, orgAccountRole, "verify_organization") + } + + if checkAccRegion == "" { + config.ValidateOrgAccountID.AccountRegion = orgDefaultAccRegion + } + + // check TTL if specified + ttl := orgAccountDefaultListDuration + checkTTL := config.ValidateOrgAccountID.AccountListTTL + if checkTTL != "" { + t, err := time.ParseDuration(checkTTL) + if err != nil { + return status.Errorf(codes.InvalidArgument, "please ensure that %q if configured, it should be in duration and is suffixed with required 'm' for time duration in minute ex. '5m'. Otherwise, remove the: %q, in the block: %q. Default TTL will be: %q", orgAccountListTTL, orgAccountListTTL, "verify_organization", orgAccountDefaultListTTL) + } + + if t.Minutes() < orgAccountMinTTL.Minutes() { + return status.Errorf(codes.InvalidArgument, "please ensure that %q if configured, it should be greater than or equal to %q. Otherwise remove the: %q, in the block: %q. Default TTL will be: %q", orgAccountListTTL, orgAccountMinListTTL, orgAccountListTTL, "verify_organization", orgAccountDefaultListTTL) + } + + ttl = t + } + + // Assign default ttl if ttl doesnt exist. + config.ValidateOrgAccountID.AccountListTTL = ttl.String() + + return nil +} diff --git a/pkg/server/plugin/nodeattestor/awsiid/iid_test.go b/pkg/server/plugin/nodeattestor/awsiid/iid_test.go index db43eaf87f..14c78e6898 100644 --- a/pkg/server/plugin/nodeattestor/awsiid/iid_test.go +++ b/pkg/server/plugin/nodeattestor/awsiid/iid_test.go @@ -22,6 +22,8 @@ import ( ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/aws/aws-sdk-go-v2/service/iam" iamtypes "github.com/aws/aws-sdk-go-v2/service/iam/types" + "github.com/aws/aws-sdk-go-v2/service/organizations" + "github.com/aws/aws-sdk-go-v2/service/organizations/types" "github.com/fullsailor/pkcs7" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" @@ -54,6 +56,7 @@ var ( testAvailabilityZone = "test-az" testImageID = "test-image-id" testProfile = "test-profile" + testAccountID = "123456789" zeroDeviceIndex = int32(0) nonzeroDeviceIndex = int32(1) instanceStoreType = ec2types.DeviceTypeInstanceStore @@ -78,6 +81,8 @@ func TestAttest(t *testing.T) { describeInstancesError error mutateGetInstanceProfileOutput func(output *iam.GetInstanceProfileOutput) getInstanceProfileError error + mutateListAccountOutput func(output *organizations.ListAccountsOutput) + listOrgAccountError error overrideAttestationData func(caws.IIDAttestationData) caws.IIDAttestationData overridePayload func() []byte expectCode codes.Code @@ -390,6 +395,66 @@ func TestAttest(t *testing.T) { {Type: caws.PluginName, Value: "tag:Hostname:host1"}, }, }, + { + name: "fail with account id not belonging to organization", // Default attestation data already has different account id + config: `verify_organization = { management_account_id = "12345" assume_org_role = "test-orgrole" management_account_region = "test-region"}`, + expectCode: codes.Internal, + expectMsgPrefix: fmt.Sprintf("nodeattestor(aws_iid): failed aws ec2 attestation, nodes account id: %v is not part of configured organization or doesn't have ACTIVE status", testAccount), + }, + { + name: "fail call for organization list account", + config: `verify_organization = { management_account_id = "12345" assume_org_role = "test-orgrole" management_account_region = "test-region"}`, + expectCode: codes.Internal, + listOrgAccountError: errors.New("oh no"), + expectMsgPrefix: fmt.Sprintf("nodeattestor(aws_iid): failed aws ec2 attestation, issue while verifying if nodes account id: %v belong to org: %v", testAccount, "issue while getting list of accounts"), + }, + { + name: "fail for account id with not ACTIVE status in organization list", + config: `verify_organization = { management_account_id = "12345" assume_org_role = "test-orgrole" management_account_region = "test-orgregion" }`, + expectCode: codes.Internal, + mutateListAccountOutput: func(output *organizations.ListAccountsOutput) { + output.Accounts = []types.Account{{ + Id: &testAccountID, + Status: types.AccountStatusSuspended, + }} + }, + overrideAttestationData: func(id caws.IIDAttestationData) caws.IIDAttestationData { + doc := imds.InstanceIdentityDocument{ + AccountID: testAccountID, + InstanceID: testInstance, + Region: testRegion, + AvailabilityZone: testAvailabilityZone, + ImageID: testImageID, + } + docBytes, _ := json.Marshal(doc) + id.Document = string(docBytes) + return id + }, + expectMsgPrefix: fmt.Sprintf("nodeattestor(aws_iid): failed aws ec2 attestation, nodes account id: %v is not part of configured organization or doesn't have ACTIVE status", testAccountID), + }, + { + name: "success when organization validation feature is turned on", + config: `verify_organization = { management_account_id = "12345" assume_org_role = "test-orgrole" management_account_region = "test-orgregion" }`, + overrideAttestationData: func(id caws.IIDAttestationData) caws.IIDAttestationData { + doc := imds.InstanceIdentityDocument{ + AccountID: testAccountID, + InstanceID: testInstance, + Region: testRegion, + AvailabilityZone: testAvailabilityZone, + ImageID: testImageID, + } + docBytes, _ := json.Marshal(doc) + id.Document = string(docBytes) + return id + }, + expectID: "spiffe://example.org/spire/agent/aws_iid/123456789/test-region/test-instance", + expectSelectors: []*common.Selector{ + {Type: caws.PluginName, Value: "az:test-az"}, + {Type: caws.PluginName, Value: "image:id:test-image-id"}, + {Type: caws.PluginName, Value: "instance:id:test-instance"}, + {Type: caws.PluginName, Value: "region:test-region"}, + }, + }, } { t.Run(tt.name, func(t *testing.T) { client := newFakeClient() @@ -401,6 +466,10 @@ func TestAttest(t *testing.T) { if tt.mutateGetInstanceProfileOutput != nil { tt.mutateGetInstanceProfileOutput(client.GetInstanceProfileOutput) } + client.ListAccountError = tt.listOrgAccountError + if tt.mutateListAccountOutput != nil { + tt.mutateListAccountOutput(client.ListAccountOutput) + } agentStore := fakeagentstore.New() if tt.alreadyAttested { @@ -435,7 +504,7 @@ func TestAttest(t *testing.T) { return testAWSCACert, nil } - attestor.clients = newClientsCache(func(ctx context.Context, config *SessionConfig, region string, assumeRoleARN string) (Client, error) { + attestor.clients = newClientsCache(func(ctx context.Context, config *SessionConfig, region string, assumeRoleARN string, orgRoleArn string) (Client, error) { return client, nil }) @@ -545,6 +614,40 @@ func TestConfigure(t *testing.T) { err := doConfig(t, coreConfig, ``) require.NoError(t, err) }) + + orgVerificationFeatureErr := fmt.Errorf("make %v, %v & %v are present inside block : %v for feature node attestation using account id verification", "verify_organization", orgAccountID, orgAccountRole, orgAccRegion) + orgVerificationFeatureTTLErr := fmt.Errorf("make %v if configured, should be in hours and is suffix with required `h` for time duration in hour ex. 1h. or remove the : %v, in the block : %v. Default TTL will be : %v, for feature node attestation using account id verification", orgAccountListTTL, orgAccountListTTL, "verify_organization", orgAccountDefaultListTTL) + orgVerificationFeatureMinTTLErr := fmt.Errorf("make %v if configured, should be more than >= %v. or remove the : %v, in the block : %v. Default TTL will be : %v, for feature node attestation using account id verification", orgAccountListTTL, orgAccountMinListTTL, orgAccountListTTL, "verify_organization", orgAccountDefaultListTTL) + + t.Run("fail, account belongs to org, if params are not specified and feature enabled", func(t *testing.T) { + err := doConfig(t, coreConfig, `verify_organization = {}`) + require.Error(t, err, orgVerificationFeatureErr) + }) + + t.Run("fail, account belongs to org, if only account id is specified, roles & region are not specified", func(t *testing.T) { + err := doConfig(t, coreConfig, `verify_organization = { management_account_id = "dummy_account" }`) + require.Error(t, err, orgVerificationFeatureErr) + }) + + t.Run("fail, account belongs to org, if ttl is not specified in proper format", func(t *testing.T) { + err := doConfig(t, coreConfig, `verify_organization = { management_account_id = "dummy_account" assume_org_role = "dummy_role" org_account_map_ttl = "2" }`) + require.Error(t, err, orgVerificationFeatureTTLErr) + }) + + t.Run("fail, account belongs to org, if ttl is specified and is less than min ttl required", func(t *testing.T) { + err := doConfig(t, coreConfig, `verify_organization = { management_account_id = "dummy_account" assume_org_role = "dummy_role" org_account_map_ttl = "30s" }`) + require.Error(t, err, orgVerificationFeatureMinTTLErr) + }) + + t.Run("success, verify_organization featured enabled with required params", func(t *testing.T) { + err := doConfig(t, coreConfig, `verify_organization = { management_account_id = "dummy_account" assume_org_role = "dummy_role" }`) + require.NoError(t, err) + }) + + t.Run("success, verify_organization featured enabled with all params", func(t *testing.T) { + err := doConfig(t, coreConfig, `verify_organization = { management_account_id = "dummy_account" assume_org_role = "dummy_role" org_account_map_ttl = "1m30s" }`) + require.NoError(t, err) + }) } func TestInstanceProfileArnParsing(t *testing.T) { @@ -572,6 +675,8 @@ type fakeClient struct { DescribeInstancesError error GetInstanceProfileOutput *iam.GetInstanceProfileOutput GetInstanceProfileError error + ListAccountOutput *organizations.ListAccountsOutput + ListAccountError error } func newFakeClient() *fakeClient { @@ -595,6 +700,7 @@ func newFakeClient() *fakeClient { }, }, GetInstanceProfileOutput: &iam.GetInstanceProfileOutput{}, + ListAccountOutput: &organizations.ListAccountsOutput{}, } } @@ -619,6 +725,22 @@ func (c *fakeClient) GetInstanceProfile(_ context.Context, input *iam.GetInstanc return c.GetInstanceProfileOutput, c.GetInstanceProfileError } +func (c *fakeClient) ListAccounts(_ context.Context, input *organizations.ListAccountsInput, _ ...func(*organizations.Options)) (*organizations.ListAccountsOutput, error) { + // Only modify the output if its not being mutated in test for : mutateListAccountOutput. + if c.ListAccountOutput.Accounts == nil { + c.ListAccountOutput = &organizations.ListAccountsOutput{ + Accounts: []types.Account{{ + Id: &testAccountID, + Status: types.AccountStatusActive, + }}, + } + } + if input.NextToken != nil { + return nil, fmt.Errorf("failing request for pagination") + } + return c.ListAccountOutput, c.ListAccountError +} + func buildAttestationDataRSA2048Signature(t *testing.T) caws.IIDAttestationData { // doc body doc := imds.InstanceIdentityDocument{ diff --git a/pkg/server/plugin/nodeattestor/awsiid/organization.go b/pkg/server/plugin/nodeattestor/awsiid/organization.go new file mode 100644 index 0000000000..720bd4d752 --- /dev/null +++ b/pkg/server/plugin/nodeattestor/awsiid/organization.go @@ -0,0 +1,254 @@ +package awsiid + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/andres-erbsen/clock" + "github.com/aws/aws-sdk-go-v2/service/organizations" + "github.com/aws/aws-sdk-go-v2/service/organizations/types" + "github.com/hashicorp/go-hclog" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +const ( + orgAccountID = "management_account_id" + orgAccountRole = "assume_org_role" + orgAccRegion = "management_account_region" // required for cache key + orgAccountStatus = "ACTIVE" // Only allow node account id's with status ACTIVE + orgAccountListTTL = "org_account_map_ttl" // Cache the list of account for specific time, if not sent default will be used. + orgAccountDefaultListTTL = "3m" // pull account list after 3 minutes + orgAccountMinListTTL = "1m" // Minimum TTL configuration to pull the org account list + orgAccountRetries = 5 + orgDefaultAccRegion = "us-west-2" +) + +var ( + orgAccountDefaultListDuration, _ = time.ParseDuration(orgAccountDefaultListTTL) + orgAccountMinTTL, _ = time.ParseDuration(orgAccountMinListTTL) +) + +type orgValidationConfig struct { + AccountID string `hcl:"management_account_id"` + AccountRole string `hcl:"assume_org_role"` + AccountRegion string `hcl:"management_account_region"` + AccountListTTL string `hcl:"org_account_map_ttl"` +} + +type orgValidator struct { + orgAccountList map[string]any + orgAccountListValidDuration time.Time + orgConfig *orgValidationConfig + mutex sync.RWMutex + // orgAccountListCacheTTL holds the cache ttl from configuration; otherwise, it will be set to the default value. + orgAccountListCacheTTL time.Duration + log hclog.Logger + // retries fix number of retries before ttl is expired. + retries int + // require for testing + clk clock.Clock +} + +func newOrganizationValidationBase(config *orgValidationConfig) *orgValidator { + client := &orgValidator{ + orgAccountList: make(map[string]any), + orgConfig: config, + retries: orgAccountRetries, + clk: clock.New(), + } + + return client +} + +func (o *orgValidator) getRetries() int { + o.mutex.RLock() + defer o.mutex.RUnlock() + return o.retries +} + +func (o *orgValidator) decrRetries() int { + o.mutex.Lock() + defer o.mutex.Unlock() + if o.retries > 0 { + o.retries-- + } + + return o.retries +} + +func (o *orgValidator) configure(config *orgValidationConfig) error { + o.mutex.Lock() + defer o.mutex.Unlock() + + o.orgConfig = config + + // While doing configuration invalidate the map so we dont keep using old one. + o.orgAccountList = make(map[string]any) + o.retries = orgAccountRetries + + t, err := time.ParseDuration(config.AccountListTTL) + if err != nil { + return status.Errorf(codes.InvalidArgument, "issue while parsing ttl for organization, while configuring orgnization validation: %v", err) + } + + o.orgAccountListCacheTTL = t + + return nil +} + +func (o *orgValidator) setLogger(log hclog.Logger) { + o.log = log +} + +// IsMemberAccount method checks if the Account ID attached on the node is part of the organisation. +// If it part of the organisation then validation should be succesfull if not attestation should fail, on enabling this verification method. +// This could be alternative for not explicitly maintaining allowed list of account ids. +// Method pulls the list of accounts from organization and caches it for certain time, cache time can be configured. +func (o *orgValidator) IsMemberAccount(ctx context.Context, orgClient organizations.ListAccountsAPIClient, accountIDOfNode string) (bool, error) { + reValidatedCache, err := o.validateCache(ctx, orgClient) + if err != nil { + return false, err + } + + accountIsmemberOfOrg, err := o.lookupCache(ctx, orgClient, accountIDOfNode, reValidatedCache) + if err != nil { + return false, err + } + + return accountIsmemberOfOrg, nil +} + +// validateCache validates cache and refresh if its stale +func (o *orgValidator) validateCache(ctx context.Context, orgClient organizations.ListAccountsAPIClient) (bool, error) { + isStale := o.checkIfOrgAccountListIsStale() + if !isStale { + return false, nil + } + + // cache is stale, reload the account map + _, err := o.reloadAccountList(ctx, orgClient, false) + if err != nil { + return false, err + } + + return true, nil +} + +func (o *orgValidator) lookupCache(ctx context.Context, orgClient organizations.ListAccountsAPIClient, accountIDOfNode string, reValidatedCache bool) (bool, error) { + o.mutex.RLock() + orgAccountList := o.orgAccountList + o.mutex.RUnlock() + + _, accoutIsmemberOfOrg := orgAccountList[accountIDOfNode] + + // Retry if it doesn't exist in cache and cache was not revalidated + if !accoutIsmemberOfOrg && !reValidatedCache { + orgAccountList, err := o.refreshCache(ctx, orgClient) + if err != nil { + o.log.Error("Failed to refesh cache, while validating account id: %v", accountIDOfNode, "error", err.Error()) + return false, err + } + _, accoutIsmemberOfOrg = orgAccountList[accountIDOfNode] + } + + return accoutIsmemberOfOrg, nil +} + +// refreshCache refreshes list with new cache if cache miss happens and check if element exist +func (o *orgValidator) refreshCache(ctx context.Context, orgClient organizations.ListAccountsAPIClient) (map[string]any, error) { + remTries := o.getRetries() + + orgAccountList := make(map[string]any) + if remTries <= 0 { + return orgAccountList, nil + } + + orgAccountList, err := o.reloadAccountList(ctx, orgClient, true) + if err != nil { + return nil, err + } + + o.decrRetries() + + return orgAccountList, nil +} + +// checkIfOrgAccountListIsStale checks if the cached org account list is stale. +func (o *orgValidator) checkIfOrgAccountListIsStale() bool { + o.mutex.RLock() + defer o.mutex.RUnlock() + + // Map is empty that means this is first time plugin is being initialised + if len(o.orgAccountList) == 0 { + return true + } + + return o.checkIfTTLIsExpired(o.orgAccountListValidDuration) +} + +// reloadAccountList gets the list of accounts belonging to organization and catch them +func (o *orgValidator) reloadAccountList(ctx context.Context, orgClient organizations.ListAccountsAPIClient, catchBurst bool) (map[string]any, error) { + o.mutex.Lock() + defer o.mutex.Unlock() + + // Make sure: we are not doing cache burst and account map is not updated recently from different go routine. + if !catchBurst && len(o.orgAccountList) != 0 && !o.checkIfTTLIsExpired(o.orgAccountListValidDuration) { + return o.orgAccountList, nil + } + + // Avoid if other thread has already updated the map + if catchBurst && o.retries == 0 { + return o.orgAccountList, nil + } + + // Get the list of accounts + listAccountsOp, err := orgClient.ListAccounts(ctx, &organizations.ListAccountsInput{}) + if err != nil { + return nil, fmt.Errorf("issue while getting list of accounts: %w", err) + } + + // Build new org accounts list + orgAccountsMap := make(map[string]any) + + // Update the org account list cache with ACTIVE accounts & handle pagination + for { + for _, acc := range listAccountsOp.Accounts { + if acc.Status == types.AccountStatusActive { + accID := *acc.Id + orgAccountsMap[accID] = struct{}{} + } + } + + if listAccountsOp.NextToken == nil { + break + } + + listAccountsOp, err = orgClient.ListAccounts(ctx, &organizations.ListAccountsInput{ + NextToken: listAccountsOp.NextToken, + }) + if err != nil { + return nil, fmt.Errorf("issue while getting list of accounts in pagination: %w", err) + } + } + + // Update timestamp, if it was not invoked as part of cache miss. + if !catchBurst { + o.orgAccountListValidDuration = o.clk.Now().UTC().Add(o.orgAccountListCacheTTL) + // Also reset the retries + o.retries = orgAccountRetries + } + + // Overwrite the cache/list + o.orgAccountList = orgAccountsMap + + return o.orgAccountList, nil +} + +// checkIFTTLIsExpire check if the creation time is pass defined ttl +func (o *orgValidator) checkIfTTLIsExpired(ttl time.Time) bool { + currTimeStamp := o.clk.Now().UTC() + return currTimeStamp.After(ttl) +} diff --git a/pkg/server/plugin/nodeattestor/awsiid/organization_test.go b/pkg/server/plugin/nodeattestor/awsiid/organization_test.go new file mode 100644 index 0000000000..f40c361d77 --- /dev/null +++ b/pkg/server/plugin/nodeattestor/awsiid/organization_test.go @@ -0,0 +1,140 @@ +package awsiid + +import ( + "context" + "testing" + "time" + + "github.com/andres-erbsen/clock" + "github.com/aws/aws-sdk-go-v2/service/organizations" + "github.com/aws/aws-sdk-go-v2/service/organizations/types" + "github.com/stretchr/testify/require" +) + +const ( + testAccountListTTL = "1m" + testClockMutAfter = "after" + testClockMutBefore = "before" +) + +func TestIsMemberAccount(t *testing.T) { + testOrgValidator := buildOrgValidationClient() + testClient := newFakeClient() + + // pass valid account + ok, err := testOrgValidator.IsMemberAccount(context.Background(), testClient, testAccountID) + require.NoError(t, err) + require.Equal(t, ok, true) + + // fail valid account doesnt exist + ok, err = testOrgValidator.IsMemberAccount(context.Background(), testClient, "9999999") + require.NoError(t, err) + require.Equal(t, ok, false) +} + +func TestCheckIfOrgAccountListIsStale(t *testing.T) { + testOrgValidator := buildOrgValidationClient() + + testIsStale := testOrgValidator.checkIfOrgAccountListIsStale() + require.True(t, testIsStale) + + // seed account list and it should return false + _, err := testOrgValidator.reloadAccountList(context.Background(), newFakeClient(), false) + require.NoError(t, err) + testIsStale = testOrgValidator.checkIfOrgAccountListIsStale() + require.False(t, testIsStale) +} + +func TestReloadAccountList(t *testing.T) { + testOrgValidator := buildOrgValidationClient() + testClient := newFakeClient() + + // check once config is provided correctly and catchburst is false, account list popped up along with timestamp + _, err := testOrgValidator.reloadAccountList(context.Background(), testClient, false) + require.NoError(t, err) + require.Len(t, testOrgValidator.orgAccountList, 1) + require.Greater(t, testOrgValidator.orgAccountListValidDuration, time.Now()) + require.Equal(t, testOrgValidator.retries, orgAccountRetries) + + // check if the list of accounts is updated when catchburst is true + // but the timestamp is not updated + existingValidDuration := testOrgValidator.orgAccountListValidDuration + testOrgValidator.orgAccountList = make(map[string]any) + _, err = testOrgValidator.reloadAccountList(context.Background(), testClient, true) + require.NoError(t, err) + require.Equal(t, existingValidDuration, testOrgValidator.orgAccountListValidDuration) + require.Len(t, testOrgValidator.orgAccountList, 1) + + // set retry to 0 and make sure the list is not updated + testOrgValidator.retries = 0 + testOrgValidator.orgAccountList = make(map[string]any) + _, err = testOrgValidator.reloadAccountList(context.Background(), testClient, true) + require.NoError(t, err) + require.Empty(t, testOrgValidator.orgAccountList) + + // make sure retry is reset, once we are over TTL + // move clock ahead by 10 minutes. And as our TTL is 1 minute, it should refresh + // the list + testOrgValidator = buildOrgValidationClient() + _, err = testOrgValidator.reloadAccountList(context.Background(), testClient, false) + require.NoError(t, err) + require.Len(t, testOrgValidator.orgAccountList, 1) + testOrgValidator.clk = buildNewMockClock(10*time.Minute, testClockMutAfter) + testOrgValidator.retries = 0 // trigger refresh to reset retries + require.Equal(t, testOrgValidator.retries, 0) + _, err = testOrgValidator.reloadAccountList(context.Background(), testClient, false) + require.NoError(t, err) + require.Equal(t, testOrgValidator.retries, orgAccountRetries) + + // make sure errors is handled when list accounts call fails + // while making subsequent calls + testOrgValidator = buildOrgValidationClient() + testToken := "uncooolrandomtoken" + testClient.ListAccountOutput = &organizations.ListAccountsOutput{ + Accounts: []types.Account{{ + Id: &testAccountID, + Status: types.AccountStatusActive, + }}, + NextToken: &testToken, + } + _, err = testOrgValidator.reloadAccountList(context.Background(), testClient, false) + require.ErrorContains(t, err, "issue while getting list of accounts") +} + +func TestCheckIfTTLIsExpired(t *testing.T) { + testOrgValidator := buildOrgValidationClient() + + // expect not expired, move clock back by 10 minutes + testOrgValidator.clk = buildNewMockClock(10*time.Minute, testClockMutBefore) + expired := testOrgValidator.checkIfTTLIsExpired(time.Now()) + require.False(t, expired) + + // expect expired, move clock forward by 10 minute + testOrgValidator.clk = buildNewMockClock(10*time.Minute, testClockMutAfter) + expired = testOrgValidator.checkIfTTLIsExpired(time.Now()) + require.True(t, expired) +} + +func buildOrgValidationClient() *orgValidator { + testOrgValidationConfig := &orgValidationConfig{ + AccountID: testAccountID, + AccountRole: testProfile, + AccountRegion: testRegion, + AccountListTTL: testAccountListTTL, + } + testOrgValidator := newOrganizationValidationBase(testOrgValidationConfig) + _ = testOrgValidator.configure(testOrgValidationConfig) + return testOrgValidator +} + +func buildNewMockClock(t time.Duration, mut string) *clock.Mock { + testClock := clock.NewMock() + switch mut := mut; mut { + case testClockMutAfter: + testClock.Set(time.Now().UTC()) + testClock.Add(t) + case testClockMutBefore: + testClock.Set(time.Now().UTC().Add(-t)) + } + return testClock +}