Skip to content

Commit

Permalink
Merge pull request #315 from asserts/radha/sc-16157-fix-some-more-issues
Browse files Browse the repository at this point in the history
Radha/sc 16157 fix some more issues
  • Loading branch information
jradhakrishnan authored Aug 30, 2023
2 parents 28b1db4 + 3b16861 commit e39bc80
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 50 deletions.
17 changes: 12 additions & 5 deletions src/main/java/ai/asserts/aws/AWSApiCallRateLimiter.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.util.concurrent.Callable;

import static ai.asserts.aws.MetricNameUtil.ASSERTS_ERROR_TYPE;
import static ai.asserts.aws.MetricNameUtil.ASSERTS_CUSTOMER;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_ACCOUNT_ID_LABEL;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_ERROR_COUNT_METRIC;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_LATENCY_METRIC;
Expand Down Expand Up @@ -71,21 +72,27 @@ public <K extends AWSAPICall<V>, V> V doWithRateLimit(String api, SortedMap<Stri
tick = System.currentTimeMillis();
return k.makeCall();
} catch (Throwable e) {
log.error("Exception", e);
log.error("Exception in: " + regionKey, e);
SortedMap<String, String> errorLabels = new TreeMap<>(labels);
errorLabels.put(ASSERTS_ERROR_TYPE, e.getClass().getSimpleName());

// In SaaS mode, we don't want the exporter internal metrics to end up in the tenant's TSDB
errorLabels.remove(TENANT);
if (tenantName != null) {
errorLabels.put(TENANT, tenantName);
errorLabels.put(ASSERTS_CUSTOMER, tenantName);
}
metricCollector.recordCounterValue(SCRAPE_ERROR_COUNT_METRIC, errorLabels, 1);
throw new RuntimeException(e);
} finally {
tick = System.currentTimeMillis() - tick;
labels = new TreeMap<>(labels);

// In SaaS mode, we don't want the exporter internal metrics to end up in the tenant's TSDB
SortedMap<String, String> latencyLabels = new TreeMap<>(labels);
latencyLabels.remove(TENANT);
if (tenantName != null) {
labels.put(TENANT, tenantName);
latencyLabels.put(ASSERTS_CUSTOMER, tenantName);
}
metricCollector.recordLatency(SCRAPE_LATENCY_METRIC, labels, tick);
metricCollector.recordLatency(SCRAPE_LATENCY_METRIC, latencyLabels, tick);
}
}

Expand Down
1 change: 1 addition & 0 deletions src/main/java/ai/asserts/aws/MetricNameUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ public class MetricNameUtil {
public static final String SCRAPE_LATENCY_METRIC = "aws_exporter_milliseconds";
public static final String ASSERTS_ERROR_TYPE = "asserts_error_type";
public static final String TENANT = "tenant";
public static final String ASSERTS_CUSTOMER = "asserts_customer";
public static final String ENV = "asserts_env";
public static final String SITE = "asserts_site";
public static final String SCRAPE_ERROR_COUNT_METRIC = "aws_exporter_error_total";
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/ai/asserts/aws/exporter/ECSTaskProvider.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import static ai.asserts.aws.MetricNameUtil.SCRAPE_NAMESPACE_LABEL;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_OPERATION_LABEL;
import static ai.asserts.aws.MetricNameUtil.SCRAPE_REGION_LABEL;
import static ai.asserts.aws.MetricNameUtil.TENANT;

/**
* Builds ECS Task scrape targets. Scraping ECS is best done using the ECS Sidecar.
Expand Down Expand Up @@ -111,6 +112,7 @@ public List<MetricFamilySamples> collect() {

target.getLogConfigs().forEach(logConfig -> {
Map<String, String> logInfoLabels = new TreeMap<>();
logInfoLabels.put(TENANT, labels.getTenant());
logInfoLabels.put(SCRAPE_ACCOUNT_ID_LABEL, labels.getAccountId());
logInfoLabels.put(SCRAPE_REGION_LABEL, labels.getRegion());
logInfoLabels.put("cluster", labels.getCluster());
Expand Down
76 changes: 39 additions & 37 deletions src/main/java/ai/asserts/aws/exporter/LBToLambdaRoutingBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
*/
package ai.asserts.aws.exporter;

import ai.asserts.aws.AWSClientProvider;
import ai.asserts.aws.AWSApiCallRateLimiter;
import ai.asserts.aws.AWSClientProvider;
import ai.asserts.aws.SimpleTenantTask;
import ai.asserts.aws.TaskExecutorUtil;
import ai.asserts.aws.account.AWSAccount;
Expand Down Expand Up @@ -80,42 +80,44 @@ private Pair<Set<ResourceRelation>, Set<Resource>> buildRelations(String region,
try {
ElasticLoadBalancingV2Client elbV2Client = awsClientProvider.getELBV2Client(region, accountRegion);
Map<Resource, Resource> tgToLB = targetGroupLBMapProvider.getTgToLB();
tgToLB.keySet().forEach(tg -> {
try {
String api = "ElasticLoadBalancingV2Client/describeTargetHealth";
DescribeTargetHealthResponse response = rateLimiter.doWithRateLimit(
api,
ImmutableSortedMap.of(
SCRAPE_REGION_LABEL, region,
SCRAPE_ACCOUNT_ID_LABEL, accountRegion.getAccountId(),
SCRAPE_OPERATION_LABEL, api
)
, () -> elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder()
.targetGroupArn(tg.getArn())
.build()));
if (!isEmpty(response.targetHealthDescriptions())) {
response.targetHealthDescriptions().stream()
.map(tH -> resourceMapper.map(tH.target().id()))
.filter(opt -> opt.isPresent() && opt.get().getType().equals(LambdaFunction))
.map(Optional::get)
.forEach(lambda -> routing.add(ResourceRelation.builder()
.from(tgToLB.get(tg))
.to(lambda)
.name("ROUTES_TO")
.build()));
}
} catch (TargetGroupNotFoundException e) {
log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg);
missingTgs.add(tg);
} catch (Exception e) {
if (e.getCause() instanceof TargetGroupNotFoundException) {
log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg);
missingTgs.add(tg);
} else {
log.error("Failed to build resource relations", e);
}
}
});
tgToLB.keySet().stream()
.filter(tg -> tg.getAccount().equals(accountRegion.getAccountId()) && region.equals(tg.getRegion()))
.forEach(tg -> {
try {
String api = "ElasticLoadBalancingV2Client/describeTargetHealth";
DescribeTargetHealthResponse response = rateLimiter.doWithRateLimit(
api,
ImmutableSortedMap.of(
SCRAPE_REGION_LABEL, region,
SCRAPE_ACCOUNT_ID_LABEL, accountRegion.getAccountId(),
SCRAPE_OPERATION_LABEL, api
)
, () -> elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder()
.targetGroupArn(tg.getArn())
.build()));
if (!isEmpty(response.targetHealthDescriptions())) {
response.targetHealthDescriptions().stream()
.map(tH -> resourceMapper.map(tH.target().id()))
.filter(opt -> opt.isPresent() && opt.get().getType().equals(LambdaFunction))
.map(Optional::get)
.forEach(lambda -> routing.add(ResourceRelation.builder()
.from(tgToLB.get(tg))
.to(lambda)
.name("ROUTES_TO")
.build()));
}
} catch (TargetGroupNotFoundException e) {
log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg);
missingTgs.add(tg);
} catch (Exception e) {
if (e.getCause() instanceof TargetGroupNotFoundException) {
log.warn("LoadBalancer-2-TargetGroup Cache refers to non-existent TargetGroup {}", tg);
missingTgs.add(tg);
} else {
log.error("Failed to build resource relations", e);
}
}
});
} catch (Exception e) {
log.error("Error " + accountRegion, e);
}
Expand Down
12 changes: 7 additions & 5 deletions src/main/java/ai/asserts/aws/exporter/MetricSampleBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,13 @@ public Optional<Sample> buildSingleSample(String metricName, Map<String, String>
Double metric) {
labels = new TreeMap<>(labels);
AWSAccount accountDetails = taskExecutorUtil.getAccountDetails();
labels.putIfAbsent(TENANT, accountDetails.getTenant());
if (hasLength(accountDetails.getName())) {
labels.putIfAbsent(ENV, accountDetails.getName());
} else {
labels.putIfAbsent(ENV, accountDetails.getAccountId());
if (accountDetails != null) {
labels.putIfAbsent(TENANT, accountDetails.getTenant());
if (hasLength(accountDetails.getName())) {
labels.putIfAbsent(ENV, accountDetails.getName());
} else {
labels.putIfAbsent(ENV, accountDetails.getAccountId());
}
}
if (labels.containsKey(SCRAPE_REGION_LABEL)) {
labels.putIfAbsent(SITE, labels.get(SCRAPE_REGION_LABEL));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public void setup() {
labels = new TreeMap<>();
labels.put("account_id", "account");
labels.put("region", "region");
labels.put("tenant", "acme");
labels.put("asserts_customer", "acme");
rateLimiter = new AWSApiCallRateLimiter(metricCollector,
(accountId) -> "acme", 1.0D);
}
Expand Down
14 changes: 13 additions & 1 deletion src/test/java/ai/asserts/aws/exporter/ECSTaskProviderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ public void afterPropertiesSet() throws Exception {
@Test
public void collect() throws Exception {
Resource cluster1 = Resource.builder()
.tenant("acme")
.name("cluster1")
.region("region")
.account("account1")
Expand All @@ -124,6 +125,7 @@ public void collect() throws Exception {


Resource task1 = Resource.builder()
.tenant("acme")
.name("task1")
.build();

Expand All @@ -143,6 +145,7 @@ public void collect() throws Exception {
tasksByCluster.put(cluster1, ImmutableMap.of(task1, ImmutableList.of(mockStaticConfig, mockStaticConfig)));

Labels labels1 = Labels.builder()
.tenant("acme")
.accountId("account1")
.region("us-west-2")
.cluster("cluster")
Expand All @@ -162,6 +165,7 @@ public void collect() throws Exception {

expect(sampleBuilder.buildSingleSample(CONTAINER_LOG_INFO_METRIC,
ImmutableSortedMap.<String, String>naturalOrder()
.put("tenant", "acme")
.put("account_id", "account1")
.put("region", "us-west-2")
.put("cluster", "cluster")
Expand All @@ -174,6 +178,7 @@ public void collect() throws Exception {
.build(), 1.0D)).andReturn(Optional.of(mockSample));

Labels labels2 = Labels.builder()
.tenant("acme")
.accountId("account1")
.region("us-west-2")
.cluster("cluster")
Expand All @@ -192,6 +197,7 @@ public void collect() throws Exception {
.build()));
expect(sampleBuilder.buildSingleSample(CONTAINER_LOG_INFO_METRIC,
ImmutableSortedMap.<String, String>naturalOrder()
.put("tenant", "acme")
.put("account_id", "account1")
.put("region", "us-west-2")
.put("cluster", "cluster")
Expand Down Expand Up @@ -226,19 +232,22 @@ public void getService() {
@Test
public void getScrapeTargets() {
AWSAccount awsAccount = AWSAccount.builder()
.tenant("acme")
.accountId("account1")
.regions(ImmutableSet.of("region"))
.name("test-account")
.build();

Resource cluster1 = Resource.builder()
.tenant("acme")
.name("cluster1")
.region("region")
.account("account1")
.arn("cluster1-arn")
.build();

Resource task1 = Resource.builder()
.tenant("acme")
.name("task1")
.build();

Expand All @@ -265,7 +274,7 @@ void buildNewTargets(AWSAccount _account, ScrapeConfig _scrapeConfig,

testClass.getTasksByCluster().put(cluster1, ImmutableMap.of(task1, ImmutableList.of(mockStaticConfig)));

expect(scrapeConfigProvider.getScrapeConfig(null)).andReturn(scrapeConfig);
expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig);

expect(accountProvider.getAccounts()).andReturn(ImmutableSet.of(awsAccount));
expect(awsClientProvider.getECSClient("region", awsAccount)).andReturn(ecsClient);
Expand All @@ -279,6 +288,7 @@ void buildNewTargets(AWSAccount _account, ScrapeConfig _scrapeConfig,
@Test
public void discoverNewTargets() {
Resource cluster1 = Resource.builder()
.tenant("acme")
.name("cluster1")
.region("region")
.account("account1")
Expand Down Expand Up @@ -338,6 +348,7 @@ public void discoverNewTargets() {
public void buildNewTargets() {
expect(scrapeConfigProvider.getScrapeConfig("acme")).andReturn(scrapeConfig).anyTimes();
Resource cluster1 = Resource.builder()
.tenant("acme")
.name("cluster1")
.region("region")
.account("account1")
Expand All @@ -347,6 +358,7 @@ public void buildNewTargets() {
Resource service2 = Resource.builder().arn("service2-arn").build();

Resource cluster2 = Resource.builder()
.tenant("acme")
.name("cluster2")
.region("region")
.account("account1")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public void setup() {
new TaskExecutorUtil(new TestTaskThreadPool(),
new AWSApiCallRateLimiter(metricCollector, (account) -> "acme")));

AWSAccount awsAccount = new AWSAccount("tenant", "account", "accessId", "secretKey", "role",
AWSAccount awsAccount = new AWSAccount("acme", "account", "accessId", "secretKey", "role",
ImmutableSet.of("region"));
expect(accountProvider.getAccounts()).andReturn(ImmutableSet.of(awsAccount)).anyTimes();
expect(awsClientProvider.getELBV2Client("region", awsAccount)).andReturn(elbV2Client).anyTimes();
Expand All @@ -80,7 +80,11 @@ public void setup() {

@Test
void getRoutings() {
expect(targetGroupResource.getAccount()).andReturn("account");
expect(targetGroupResource.getRegion()).andReturn("region");
expect(targetGroupResource.getArn()).andReturn("tg-arn");
expect(targetGroupResource2.getAccount()).andReturn("account");
expect(targetGroupResource2.getRegion()).andReturn("region");
expect(targetGroupResource2.getArn()).andReturn("tg-arn2");
expect(elbV2Client.describeTargetHealth(DescribeTargetHealthRequest.builder()
.targetGroupArn("tg-arn")
Expand Down

0 comments on commit e39bc80

Please sign in to comment.