diff --git a/config/src/main/java/ai/asserts/aws/config/ScrapeConfig.java b/config/src/main/java/ai/asserts/aws/config/ScrapeConfig.java index c5f31cfd..bb76164e 100644 --- a/config/src/main/java/ai/asserts/aws/config/ScrapeConfig.java +++ b/config/src/main/java/ai/asserts/aws/config/ScrapeConfig.java @@ -69,6 +69,9 @@ public class ScrapeConfig { @Builder.Default private boolean discoverOnlySubnetTasks = false; + @Builder.Default + private boolean fetchEC2Metadata = false; + @Builder.Default private Map primaryExporterByAccount = new TreeMap<>(); diff --git a/src/main/java/ai/asserts/aws/exporter/EC2ToEBSVolumeExporter.java b/src/main/java/ai/asserts/aws/exporter/EC2ToEBSVolumeExporter.java index da9c1f7a..8ab3def2 100644 --- a/src/main/java/ai/asserts/aws/exporter/EC2ToEBSVolumeExporter.java +++ b/src/main/java/ai/asserts/aws/exporter/EC2ToEBSVolumeExporter.java @@ -4,9 +4,9 @@ */ package ai.asserts.aws.exporter; +import ai.asserts.aws.AWSApiCallRateLimiter; import ai.asserts.aws.AWSClientProvider; import ai.asserts.aws.CollectionBuilderTask; -import ai.asserts.aws.AWSApiCallRateLimiter; import ai.asserts.aws.ScrapeConfigProvider; import ai.asserts.aws.TagUtil; import ai.asserts.aws.TaskExecutorUtil; @@ -32,6 +32,7 @@ import software.amazon.awssdk.services.resourcegroupstaggingapi.model.Tag; import java.util.ArrayList; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -69,7 +70,8 @@ public class EC2ToEBSVolumeExporter extends Collector implements MetricProvider, public EC2ToEBSVolumeExporter(AccountProvider accountProvider, AWSClientProvider awsClientProvider, MetricSampleBuilder metricSampleBuilder, - CollectorRegistry collectorRegistry, AWSApiCallRateLimiter rateLimiter, TagUtil tagUtil, + CollectorRegistry collectorRegistry, AWSApiCallRateLimiter rateLimiter, + TagUtil tagUtil, ECSServiceDiscoveryExporter ecsServiceDiscoveryExporter, TaskExecutorUtil taskExecutorUtil, ScrapeConfigProvider scrapeConfigProvider) { this.accountProvider = accountProvider; @@ -108,12 +110,13 @@ public List call() { } })); volumeFutures.add( - taskExecutorUtil.executeTenantTask(awsAccount.getTenant(), new CollectionBuilderTask() { - @Override - public List call() { - return buildResourceRelations(awsAccount, region); - } - })); + taskExecutorUtil.executeTenantTask(awsAccount.getTenant(), + new CollectionBuilderTask() { + @Override + public List call() { + return buildResourceRelations(awsAccount, region); + } + })); })); taskExecutorUtil.awaitAll(futures, allSamples::addAll); taskExecutorUtil.awaitAll(volumeFutures, newAttachedVolumes::addAll); @@ -126,9 +129,15 @@ public List call() { } private List buildResourceRelations(AWSAccount awsAccount, String region) { + ScrapeConfig scrapeConfig = scrapeConfigProvider.getScrapeConfig(awsAccount.getTenant()); + if (!scrapeConfig.isFetchEC2Metadata()) { + log.info("Skipping EC2 Metadata fetch"); + return Collections.emptyList(); + } + + Set newAttachedVolumes = new HashSet<>(); String accountId = awsAccount.getAccountId(); Ec2Client ec2Client = awsClientProvider.getEc2Client(region, awsAccount); - Set newAttachedVolumes = new HashSet<>(); try { AtomicReference nextToken = new AtomicReference<>(); do { @@ -179,9 +188,12 @@ private List buildResourceRelations(AWSAccount awsAccount, Str private List buildEC2InstanceMetrics(String region, AWSAccount awsAccount) { List samples = new ArrayList<>(); - - Ec2Client ec2Client = awsClientProvider.getEc2Client(region, awsAccount); ScrapeConfig scrapeConfig = scrapeConfigProvider.getScrapeConfig(awsAccount.getTenant()); + if (!scrapeConfig.isFetchEC2Metadata()) { + log.info("Skipping EC2 Metadata fetch"); + return samples; + } + Ec2Client ec2Client = awsClientProvider.getEc2Client(region, awsAccount); String accountId = awsAccount.getAccountId(); SortedMap telemetryLabels = new TreeMap<>(); telemetryLabels.put(SCRAPE_REGION_LABEL, region); diff --git a/src/main/java/ai/asserts/aws/exporter/ECSTaskUtil.java b/src/main/java/ai/asserts/aws/exporter/ECSTaskUtil.java index d8d827f4..5693112a 100644 --- a/src/main/java/ai/asserts/aws/exporter/ECSTaskUtil.java +++ b/src/main/java/ai/asserts/aws/exporter/ECSTaskUtil.java @@ -6,6 +6,7 @@ import ai.asserts.aws.AWSApiCallRateLimiter; import ai.asserts.aws.AWSClientProvider; +import ai.asserts.aws.ScrapeConfigProvider; import ai.asserts.aws.TagUtil; import ai.asserts.aws.TaskExecutorUtil; import ai.asserts.aws.account.AWSAccount; @@ -60,6 +61,8 @@ public class ECSTaskUtil { private final AWSApiCallRateLimiter rateLimiter; private final TagUtil tagUtil; private final TaskExecutorUtil taskExecutorUtil; + + private final ScrapeConfigProvider scrapeConfigProvider; private final String envName; public final Cache taskDefsByARN = CacheBuilder.newBuilder() @@ -77,12 +80,14 @@ public class ECSTaskUtil { public ECSTaskUtil(AWSClientProvider awsClientProvider, ResourceMapper resourceMapper, AWSApiCallRateLimiter rateLimiter, - TagUtil tagUtil, TaskExecutorUtil taskExecutorUtil) { + TagUtil tagUtil, TaskExecutorUtil taskExecutorUtil, + ScrapeConfigProvider scrapeConfigProvider) { this.awsClientProvider = awsClientProvider; this.resourceMapper = resourceMapper; this.rateLimiter = rateLimiter; this.tagUtil = tagUtil; this.taskExecutorUtil = taskExecutorUtil; + this.scrapeConfigProvider = scrapeConfigProvider; // If the exporter's environment name is marked, use this for ECS metrics envName = getInstallEnvName(); } @@ -322,21 +327,26 @@ private SubnetDetails getSubnetDetails(Task task, Resource taskResource) { } private String getVpcId(Resource taskResource, AtomicReference subnetId) { - AtomicReference id = new AtomicReference<>(""); - Ec2Client ec2Client = awsClientProvider.getEc2Client(taskResource.getRegion(), - AWSAccount.builder() - .accountId(taskResource.getAccount()) - .build()); - DescribeSubnetsResponse r = rateLimiter.doWithRateLimit("EC2Client/describeSubnets", - ImmutableSortedMap.of( - SCRAPE_ACCOUNT_ID_LABEL, taskResource.getAccount(), - SCRAPE_REGION_LABEL, taskResource.getRegion(), - SCRAPE_OPERATION_LABEL, "EC2Client/describeSubnets" - ), - () -> ec2Client.describeSubnets(DescribeSubnetsRequest.builder() - .subnetIds(subnetId.get()) - .build())); - r.subnets().stream().findFirst().ifPresent(subnet -> id.set(subnet.vpcId())); - return id.get(); + ScrapeConfig scrapeConfig = scrapeConfigProvider.getScrapeConfig(taskResource.getTenant()); + if (scrapeConfig.isFetchEC2Metadata()) { + AtomicReference id = new AtomicReference<>(""); + Ec2Client ec2Client = awsClientProvider.getEc2Client(taskResource.getRegion(), + AWSAccount.builder() + .accountId(taskResource.getAccount()) + .build()); + DescribeSubnetsResponse r = rateLimiter.doWithRateLimit("EC2Client/describeSubnets", + ImmutableSortedMap.of( + SCRAPE_ACCOUNT_ID_LABEL, taskResource.getAccount(), + SCRAPE_REGION_LABEL, taskResource.getRegion(), + SCRAPE_OPERATION_LABEL, "EC2Client/describeSubnets" + ), + () -> ec2Client.describeSubnets(DescribeSubnetsRequest.builder() + .subnetIds(subnetId.get()) + .build())); + r.subnets().stream().findFirst().ifPresent(subnet -> id.set(subnet.vpcId())); + return id.get(); + } else { + return ""; + } } } diff --git a/src/test/java/ai/asserts/aws/exporter/EC2ToEBSVolumeExporterTest.java b/src/test/java/ai/asserts/aws/exporter/EC2ToEBSVolumeExporterTest.java index 8b721dd1..c4af3397 100644 --- a/src/test/java/ai/asserts/aws/exporter/EC2ToEBSVolumeExporterTest.java +++ b/src/test/java/ai/asserts/aws/exporter/EC2ToEBSVolumeExporterTest.java @@ -4,14 +4,14 @@ */ package ai.asserts.aws.exporter; +import ai.asserts.aws.AWSApiCallRateLimiter; import ai.asserts.aws.AWSClientProvider; import ai.asserts.aws.ScrapeConfigProvider; +import ai.asserts.aws.TagUtil; import ai.asserts.aws.TaskExecutorUtil; import ai.asserts.aws.TestTaskThreadPool; -import ai.asserts.aws.account.AccountProvider; import ai.asserts.aws.account.AWSAccount; -import ai.asserts.aws.AWSApiCallRateLimiter; -import ai.asserts.aws.TagUtil; +import ai.asserts.aws.account.AccountProvider; import ai.asserts.aws.config.ScrapeConfig; import ai.asserts.aws.resource.Resource; import ai.asserts.aws.resource.ResourceRelation; @@ -103,8 +103,8 @@ public void afterPropertiesSet() throws Exception { public void updateCollect() { expect(accountProvider.getAccounts()).andReturn(ImmutableSet.of(account)); expect(awsClientProvider.getEc2Client("region", account)).andReturn(ec2Client).anyTimes(); - expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig); - + expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig).anyTimes(); + expect(scrapeConfig.isFetchEC2Metadata()).andReturn(true).anyTimes(); DescribeInstancesRequest request = DescribeInstancesRequest.builder() .filters(Filter.builder() .name("vpc-id") @@ -232,4 +232,17 @@ public void updateCollect() { assertEquals(ImmutableList.of(metricFamilySamples), testClass.collect()); verifyAll(); } + + @Test + public void updateCollect_skipEC2MetaFetch() { + expect(accountProvider.getAccounts()).andReturn(ImmutableSet.of(account)); + expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig).anyTimes(); + expect(scrapeConfig.isFetchEC2Metadata()).andReturn(false).anyTimes(); + + replayAll(); + testClass.update(); + assertEquals(ImmutableSet.of(), testClass.getAttachedVolumes()); + assertEquals(ImmutableList.of(), testClass.collect()); + verifyAll(); + } } diff --git a/src/test/java/ai/asserts/aws/exporter/ECSTaskUtilTest.java b/src/test/java/ai/asserts/aws/exporter/ECSTaskUtilTest.java index 8fffd21c..727fe3c3 100644 --- a/src/test/java/ai/asserts/aws/exporter/ECSTaskUtilTest.java +++ b/src/test/java/ai/asserts/aws/exporter/ECSTaskUtilTest.java @@ -6,6 +6,7 @@ import ai.asserts.aws.AWSApiCallRateLimiter; import ai.asserts.aws.AWSClientProvider; +import ai.asserts.aws.ScrapeConfigProvider; import ai.asserts.aws.TagUtil; import ai.asserts.aws.TaskExecutorUtil; import ai.asserts.aws.TestTaskThreadPool; @@ -67,6 +68,9 @@ public class ECSTaskUtilTest extends EasyMockSupport { private ScrapeConfig scrapeConfig; private AWSAccount account; private String defaultEnvName; + private AWSClientProvider awsClientProvider; + private Ec2Client ec2Client; + private ScrapeConfigProvider scrapeConfigProvider; @BeforeEach public void setup() { @@ -77,50 +81,47 @@ public void setup() { resourceMapper = mock(ResourceMapper.class); metricCollector = mock(BasicMetricCollector.class); ecsClient = mock(EcsClient.class); - AWSClientProvider awsClientProvider = mock(AWSClientProvider.class); + scrapeConfigProvider = mock(ScrapeConfigProvider.class); + awsClientProvider = mock(AWSClientProvider.class); tagUtil = mock(TagUtil.class); scrapeConfig = mock(ScrapeConfig.class); + ec2Client = mock(Ec2Client.class); + AWSApiCallRateLimiter rateLimiter = new AWSApiCallRateLimiter(metricCollector, (account) -> "acme"); TaskExecutorUtil taskExecutorUtil = new TaskExecutorUtil(new TestTaskThreadPool(), rateLimiter); - Ec2Client ec2Client = mock(Ec2Client.class); defaultEnvName = "dev"; testClass = new ECSTaskUtil(awsClientProvider, resourceMapper, rateLimiter, tagUtil, - taskExecutorUtil) { + taskExecutorUtil, scrapeConfigProvider) { @Override String getInstallEnvName() { return defaultEnvName; } }; - expect(awsClientProvider.getEc2Client(anyString(), anyObject())).andReturn(ec2Client).anyTimes(); - expect(ec2Client.describeSubnets(DescribeSubnetsRequest.builder() - .subnetIds("subnet-id") - .build())).andReturn(DescribeSubnetsResponse.builder() - .subnets(Subnet.builder() - .vpcId("vpc-id") - .build()) - .build()).anyTimes(); - cluster = Resource.builder() + .tenant("acme") .name("cluster") .region("us-west-2") .account("account") .build(); service = Resource.builder() + .tenant("acme") .name("service") .region("us-west-2") .account("account") .build(); task = Resource.builder() + .tenant("acme") .name("task-id") .region("us-west-2") .account("account") .build(); taskDef = Resource.builder() + .tenant("acme") .name("task-def") .version("5") .account("account") @@ -162,6 +163,18 @@ public void getEnvName() { @Test public void containerWithDockerLabels() { + expect(awsClientProvider.getEc2Client(anyString(), anyObject())).andReturn(ec2Client).anyTimes(); + expect(ec2Client.describeSubnets(DescribeSubnetsRequest.builder() + .subnetIds("subnet-id") + .build())).andReturn(DescribeSubnetsResponse.builder() + .subnets(Subnet.builder() + .vpcId("vpc-id") + .build()) + .build()).anyTimes(); + + expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig); + expect(scrapeConfig.isFetchEC2Metadata()).andReturn(true); + expect(resourceMapper.map("task-def-arn")).andReturn(Optional.of(taskDef)); expect(resourceMapper.map("task-arn")).andReturn(Optional.of(task)); @@ -238,8 +251,98 @@ public void containerWithDockerLabels() { verifyAll(); } + @Test + public void containerWithDockerLabels_SkipVPCDiscovery() { + expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig); + expect(scrapeConfig.isFetchEC2Metadata()).andReturn(false); + + expect(resourceMapper.map("task-def-arn")).andReturn(Optional.of(taskDef)); + expect(resourceMapper.map("task-arn")).andReturn(Optional.of(task)); + + ImmutableMap logDriverOptions = ImmutableMap.of( + "awslogs-group", "asserts-aws-integration-Dev", + "awslogs-region", "us-west-2", + "awslogs-stream-prefix", "cloudwatch-exporter" + ); + TaskDefinition taskDefinition = TaskDefinition.builder() + .containerDefinitions(ContainerDefinition.builder() + .name("model-builder") + .image("image") + .dockerLabels(ImmutableMap.of( + PROMETHEUS_METRIC_PATH_DOCKER_LABEL, "/metric/path", + PROMETHEUS_PORT_DOCKER_LABEL, "8080" + )) + .logConfiguration(LogConfiguration.builder() + .logDriver(LogDriver.AWSLOGS) + .options(logDriverOptions) + .build()) + .build()) + .build(); + + expect(ecsClient.describeTaskDefinition(DescribeTaskDefinitionRequest.builder() + .taskDefinition("task-def-arn") + .build())).andReturn(DescribeTaskDefinitionResponse.builder() + .taskDefinition(taskDefinition) + .build()); + metricCollector.recordLatency(eq(SCRAPE_LATENCY_METRIC), anyObject(), anyLong()); + + expect(tagUtil.tagLabels(eq(scrapeConfig), anyObject(List.class))).andReturn(ImmutableMap.of("tag_key", + "tag_value")); + + replayAll(); + List staticConfigs = testClass.buildScrapeTargets(account, scrapeConfig, ecsClient, cluster, + Optional.of(service.getName()), Task.builder() + .taskArn("task-arn") + .taskDefinitionArn("task-def-arn") + .lastStatus("RUNNING") + .attachments(Attachment.builder() + .type(ENI) + .details(KeyValuePair.builder() + .name(PRIVATE_IPv4ADDRESS) + .value("10.20.30.40") + .build(), + KeyValuePair.builder() + .name(SUBNET_ID) + .value("subnet-id") + .build() + ) + .build()) + .build()); + assertEquals(1, staticConfigs.size()); + StaticConfig staticConfig = staticConfigs.get(0); + assertAll( + () -> assertEquals("cluster", staticConfig.getLabels().getCluster()), + () -> assertEquals("model-builder", staticConfig.getLabels().getJob()), + () -> assertEquals("task-def", staticConfig.getLabels().getTaskDefName()), + () -> assertEquals("5", staticConfig.getLabels().getTaskDefVersion()), + () -> assertEquals("service-task-id", staticConfig.getLabels().getPod()), + () -> assertEquals("/metric/path", staticConfig.getLabels().getMetricsPath()), + () -> assertEquals("model-builder", staticConfig.getLabels().getContainer()), + () -> assertEquals("", staticConfig.getLabels().getVpcId()), + () -> assertEquals(ImmutableSet.of("10.20.30.40:8080"), staticConfig.getTargets()), + () -> assertEquals(ImmutableSet.of(ECSServiceDiscoveryExporter.LogConfig.builder() + .logDriver(LogDriver.AWSLOGS.toString()) + .options(logDriverOptions) + .build()), + staticConfig.getLogConfigs()) + ); + verifyAll(); + } + @Test public void containerWithoutDockerLabels() { + expect(awsClientProvider.getEc2Client(anyString(), anyObject())).andReturn(ec2Client).anyTimes(); + expect(ec2Client.describeSubnets(DescribeSubnetsRequest.builder() + .subnetIds("subnet-id") + .build())).andReturn(DescribeSubnetsResponse.builder() + .subnets(Subnet.builder() + .vpcId("vpc-id") + .build()) + .build()).anyTimes(); + + expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig); + expect(scrapeConfig.isFetchEC2Metadata()).andReturn(true); + expect(resourceMapper.map("task-def-arn")).andReturn(Optional.of(taskDef)); expect(resourceMapper.map("task-arn")).andReturn(Optional.of(task));