Skip to content

Commit

Permalink
Add simple latency histogram metrics (#89)
Browse files Browse the repository at this point in the history
* Add simple latency histogram metrics
  • Loading branch information
YongGang authored Feb 28, 2022
1 parent f243762 commit e702f27
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
java: [ '8', '11' ]
java: ['11']
steps:
- uses: actions/checkout@v2
- name: Set up JDK
Expand Down
39 changes: 16 additions & 23 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<properties>
<dist.dir>${project.build.directory}/dist</dist.dir>
<jackson.version>2.10.0</jackson.version>
<java.numeric.version>1.8</java.numeric.version>
<java.numeric.version>11</java.numeric.version>
<kafka.version>2.6.2</kafka.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
Expand Down Expand Up @@ -204,23 +204,16 @@
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<version>3.0.5</version>
<configuration>
<effort>Max</effort>
<threshold>Low</threshold>
<failOnError>true</failOnError>
</configuration>
<executions>
<execution>
<id>analyze-compile</id>
<goals>
<goal>check</goal>
</goals>
<phase>compile</phase>
</execution>
</executions>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<version>4.5.2.0</version>
<dependencies>
<dependency>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs</artifactId>
<version>4.5.3</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>com.github.ekryd.sortpom</groupId>
Expand Down Expand Up @@ -292,7 +285,7 @@
<configuration>
<target>
<chmod dir="${dist.dir}" perm="700">
<include name="**/*.sh"/>
<include name="**/*.sh" />
</chmod>
</target>
</configuration>
Expand Down Expand Up @@ -392,7 +385,7 @@
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
<ignore />
</action>
</pluginExecution>
<pluginExecution>
Expand All @@ -405,7 +398,7 @@
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
<ignore />
</action>
</pluginExecution>
</pluginExecutions>
Expand Down Expand Up @@ -459,10 +452,10 @@
<phase>package</phase>
<configuration>
<target>
<copy file="${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar" tofile="${dist.dir}/mirus.jar"/>
<copy file="${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar" tofile="${dist.dir}/mirus.jar" />
<!-- Grant execute permission to all shell scripts in package dir by setting it to 755 -->
<chmod dir="${dist.dir}" perm="755">
<include name="**/*.sh"/>
<include name="**/*.sh" />
</chmod>
</target>
</configuration>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,45 @@
package com.salesforce.mirus.metrics;

import com.google.common.collect.Sets;
import com.salesforce.mirus.MirusSourceConnector;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.kafka.common.MetricNameTemplate;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.metrics.MetricConfig;
import org.apache.kafka.common.metrics.Metrics;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.common.metrics.stats.*;
import org.apache.kafka.common.utils.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class MirrorJmxReporter extends AbstractMirusJmxReporter {

private static final Logger logger = LoggerFactory.getLogger(MirrorJmxReporter.class);

public static final Map<Long, String> LATENCY_BUCKETS =
Map.of(
TimeUnit.MINUTES.toMillis(0),
"0m",
TimeUnit.MINUTES.toMillis(5),
"5m",
TimeUnit.MINUTES.toMillis(10),
"10m",
TimeUnit.MINUTES.toMillis(30),
"30m",
TimeUnit.MINUTES.toMillis(60),
"60m",
TimeUnit.HOURS.toMillis(12),
"12h");

private static MirrorJmxReporter instance = null;

private static final String SOURCE_CONNECTOR_GROUP = MirusSourceConnector.class.getSimpleName();

private static final Set<String> TOPIC_TAGS = new HashSet<>(Collections.singletonList("topic"));
private static final Set<String> TOPIC_BUCKET_TAGS = Sets.newHashSet("topic", "bucket");

private static final MetricNameTemplate REPLICATION_LATENCY =
new MetricNameTemplate(
Expand All @@ -38,16 +58,25 @@ public class MirrorJmxReporter extends AbstractMirusJmxReporter {
"replication-latency-ms-avg", SOURCE_CONNECTOR_GROUP,
"Average time it takes records to replicate from source to target cluster.", TOPIC_TAGS);

protected static final MetricNameTemplate HISTOGRAM_LATENCY =
new MetricNameTemplate(
"replication-latency-histogram",
SOURCE_CONNECTOR_GROUP,
"Cumulative histogram counting records delivered per second with latency exceeding a set of fixed bucket thresholds.",
TOPIC_BUCKET_TAGS);

// Map of topics to their metric objects
private final Map<String, Sensor> topicSensors;
private final Set<TopicPartition> topicPartitionSet;
private final Map<String, TreeMap<Long, Sensor>> histogramLatencySensors;

private MirrorJmxReporter() {
super(new Metrics());
super(new Metrics(new MetricConfig(), new ArrayList<>(0), Time.SYSTEM, true));
metrics.sensor("replication-latency");

topicSensors = new HashMap<>();
topicPartitionSet = new HashSet<>();
histogramLatencySensors = new HashMap<>();

logger.info("Initialized MirrorJMXReporter");
}
Expand All @@ -73,6 +102,15 @@ public synchronized void addTopics(List<TopicPartition> topicPartitions) {
.filter(topic -> !topicSensors.containsKey(topic))
.collect(Collectors.toMap(topic -> topic, this::createTopicSensor)));
topicPartitionSet.addAll(topicPartitions);

for (TopicPartition topicPartition : topicPartitions) {
TreeMap<Long, Sensor> bucketSensors = new TreeMap<>();
String topic = topicPartition.topic();
LATENCY_BUCKETS.forEach(
(edgeMillis, bucketName) ->
bucketSensors.put(edgeMillis, createHistogramSensor(topic, bucketName)));
histogramLatencySensors.put(topic, bucketSensors);
}
}

/**
Expand Down Expand Up @@ -104,6 +142,7 @@ public synchronized void removeTopics(List<TopicPartition> topicPartitions) {
topic -> {
metrics.removeSensor(replicationLatencySensorName(topic));
topicSensors.remove(topic);
histogramLatencySensors.remove(topic);
});
}

Expand All @@ -112,6 +151,24 @@ public synchronized void recordMirrorLatency(String topic, long millis) {
if (sensor != null) {
sensor.record((double) millis);
}

TreeMap<Long, Sensor> bucketSensors = histogramLatencySensors.get(topic);
for (Map.Entry<Long, Sensor> sensorEntry : bucketSensors.entrySet()) {
long edgeMillis = sensorEntry.getKey();
Sensor bucketSensor = sensorEntry.getValue();
if (millis >= edgeMillis) {
if (bucketSensor.hasExpired()) {
String bucket = LATENCY_BUCKETS.get(edgeMillis);
// explicitly replace the expired sensor with a new one
metrics.removeSensor(histogramLatencySensorName(topic, bucket));
bucketSensor = createHistogramSensor(topic, bucket);
}
bucketSensor.record(1);
} else {
// bucket sensors are sorted by edgeMillis
break;
}
}
}

private Sensor createTopicSensor(String topic) {
Expand All @@ -127,7 +184,32 @@ private Sensor createTopicSensor(String topic) {
return sensor;
}

private Sensor createHistogramSensor(String topic, String bucket) {
Map<String, String> tags = new LinkedHashMap<>();
tags.put("topic", topic);
tags.put("bucket", bucket);

// bucket sensor will be expired after 5 mins if inactive
// this is to prevent inactive bucket sensors from reporting too many zero value metrics
Sensor sensor =
metrics.sensor(
histogramLatencySensorName(topic, bucket),
null,
TimeUnit.MINUTES.toSeconds(5),
Sensor.RecordingLevel.INFO,
null);
sensor.add(
metrics.metricInstance(HISTOGRAM_LATENCY, tags),
new Rate(TimeUnit.SECONDS, new WindowedSum()));

return sensor;
}

private String replicationLatencySensorName(String topic) {
return topic + "-" + "replication-latency";
}

private String histogramLatencySensorName(String topic, String bucket) {
return topic + "-" + bucket + "-" + "histogram-latency";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package com.salesforce.mirus.metrics;

import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.metrics.Metrics;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class MirrorJmxReporterTest {

private MirrorJmxReporter mirrorJmxReporter;
private Metrics metrics;
private final String TEST_TOPIC = "TestTopic";

@Before
public void setUp() throws Exception {
mirrorJmxReporter = MirrorJmxReporter.getInstance();
metrics = mirrorJmxReporter.metrics;
}

@Test
public void updateLatencyMetrics() {
TopicPartition topicPartition = new TopicPartition(TEST_TOPIC, 1);
mirrorJmxReporter.addTopics(List.of(topicPartition));

mirrorJmxReporter.recordMirrorLatency(TEST_TOPIC, 500);

Map<String, String> tags = new LinkedHashMap<>();
tags.put("topic", TEST_TOPIC);
tags.put("bucket", "0m");
Object value =
metrics
.metrics()
.get(
metrics.metricName(
MirrorJmxReporter.HISTOGRAM_LATENCY.name(),
MirrorJmxReporter.HISTOGRAM_LATENCY.group(),
MirrorJmxReporter.HISTOGRAM_LATENCY.description(),
tags))
.metricValue();
Assert.assertTrue((double) value > 0);

tags.put("bucket", "12h");
value =
metrics
.metrics()
.get(
metrics.metricName(
MirrorJmxReporter.HISTOGRAM_LATENCY.name(),
MirrorJmxReporter.HISTOGRAM_LATENCY.group(),
MirrorJmxReporter.HISTOGRAM_LATENCY.description(),
tags))
.metricValue();
Assert.assertTrue((double) value == 0);
}
}

0 comments on commit e702f27

Please sign in to comment.