Skip to content

Commit

Permalink
[sc] Split status and service check status
Browse files Browse the repository at this point in the history
We were sending a service check warning for integrations with too many
metrics which is not the behaviour we'd like to have. Even though there
are too many metrics, the `can_connect` service check should be green.

This was a good oppurtunity for a slight refactoring of `App.java` where
service checks and statuses in the `datadog-agent info` page have been
made completely independant.
  • Loading branch information
Etienne LAFARGE committed Sep 8, 2015
1 parent 3044e39 commit 1e4e269
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 13 deletions.
37 changes: 26 additions & 11 deletions src/main/java/org/datadog/jmxfetch/App.java
Original file line number Diff line number Diff line change
Expand Up @@ -173,21 +173,24 @@ public void doIteration() {
Instance instance = it.next();
LinkedList<HashMap<String, Object>> metrics;
String instanceStatus = Status.STATUS_OK;
String scStatus = Status.STATUS_OK;
String instanceMessage = null;
try {
metrics = instance.getMetrics();
} catch (IOException e) {
String warning = "Unable to refresh bean list for instance " + instance;
LOGGER.warn(warning, e);
reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR);
brokenInstances.add(instance);
continue;
}

if (metrics.size() == 0) {
String warning = "Instance " + instance + " didn't return any metrics";
LOGGER.warn(warning);
reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR);
brokenInstances.add(instance);
continue;
} else if (instance.isLimitReached()) {
Expand All @@ -201,8 +204,8 @@ public void doIteration() {
CustomLogger.laconic(LOGGER, Level.WARN, instanceMessage, 0);
}
reporter.sendMetrics(metrics, instance.getName());

reportStatus(appConfig, reporter, instance, metrics.size(), instanceMessage, instanceStatus);
this.reportStatus(appConfig, reporter, instance, metrics.size(), instanceMessage, instanceStatus);
this.sendServiceCheck(reporter, instance, instanceMessage, scStatus);
}


Expand Down Expand Up @@ -233,19 +236,23 @@ public void doIteration() {
} catch (IOException e) {
String warning = "Cannot connect to instance " + instance + ". Is a JMX Server running at this address?";
LOGGER.warn(warning);
reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR);
} catch (SecurityException e) {
String warning = "Cannot connect to instance " + instance + " because of bad credentials. Please check your credentials";
LOGGER.warn(warning);
reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR);
} catch (FailedLoginException e) {
String warning = "Cannot connect to instance " + instance + " because of bad credentials. Please check your credentials";
LOGGER.warn(warning);
reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR);
} catch (Exception e) {
String warning = "Cannot connect to instance " + instance + " for an unknown reason." + e.getMessage();
LOGGER.fatal(warning, e);
reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR);
}
}

Expand Down Expand Up @@ -290,11 +297,17 @@ private HashMap<String, YamlParser> getConfigs(AppConfig config) {
private void reportStatus(AppConfig appConfig, Reporter reporter, Instance instance,
int metricCount, String message, String status) {
String checkName = instance.getCheckName();
reporter.sendServiceCheck(checkName, status, message, instance.getServiceCheckTags());

appConfig.getStatus().addInstanceStats(checkName, instance.getName(),
metricCount, reporter.getServiceCheckCount(checkName),
message, status);
}

private void sendServiceCheck(Reporter reporter, Instance instance, String message,
String status) {
String checkName = instance.getCheckName();

reporter.sendServiceCheck(checkName, status, message, instance.getServiceCheckTags());
reporter.resetServiceCheckCount(checkName);
}

Expand Down Expand Up @@ -339,13 +352,15 @@ public void init(boolean forceNewConnection) {
instance.cleanUp();
brokenInstances.add(instance);
String warning = "Cannot connect to instance " + instance + " " + e.getMessage();
reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR);
LOGGER.error(warning);
} catch (Exception e) {
instance.cleanUp();
brokenInstances.add(instance);
String warning = "Unexpected exception while initiating instance " + instance + " : " + e.getMessage();
reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.reportStatus(appConfig, reporter, instance, 0, warning, Status.STATUS_ERROR);
this.sendServiceCheck(reporter, instance, warning, Status.STATUS_ERROR);
LOGGER.error(warning, e);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/org/datadog/jmxfetch/TestServiceChecks.java
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ public void testServiceCheckWarning() throws Exception {
String[] scTags = (String[]) (sc.get("tags"));

assertEquals(Reporter.formatServiceCheckPrefix("too_many_metrics"), scName);
// We should have a warning status
assertEquals(Status.STATUS_WARNING, scStatus);
// We should have an OK service check status when too many metrics are getting sent
assertEquals(Status.STATUS_OK, scStatus);
assertEquals(scTags.length, 1);
assertTrue(Arrays.asList(scTags).contains("instance:jmx_test_instance"));
mbs.unregisterMBean(objectName);
Expand Down

0 comments on commit 1e4e269

Please sign in to comment.