salesforce · YongGang · Feb 28, 2022 · Feb 2, 2022 · Feb 2, 2022 · Feb 5, 2022
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -16,7 +16,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        java: [ '8', '11' ]
+        java: ['11']
     steps:
     - uses: actions/checkout@v2    
     - name: Set up JDK

diff --git a/pom.xml b/pom.xml
@@ -12,7 +12,7 @@
     <properties>
         <dist.dir>${project.build.directory}/dist</dist.dir>
         <jackson.version>2.10.0</jackson.version>
-        <java.numeric.version>1.8</java.numeric.version>
+        <java.numeric.version>11</java.numeric.version>
         <kafka.version>2.6.2</kafka.version>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     </properties>
@@ -204,23 +204,16 @@
                 </executions>
             </plugin>
             <plugin>
-                <groupId>org.codehaus.mojo</groupId>
-                <artifactId>findbugs-maven-plugin</artifactId>
-                <version>3.0.5</version>
-                <configuration>
-                    <effort>Max</effort>
-                    <threshold>Low</threshold>
-                    <failOnError>true</failOnError>
-                </configuration>
-                <executions>
-                    <execution>
-                        <id>analyze-compile</id>
-                        <goals>
-                            <goal>check</goal>
-                        </goals>
-                        <phase>compile</phase>
-                    </execution>
-                </executions>
+                <groupId>com.github.spotbugs</groupId>
+                <artifactId>spotbugs-maven-plugin</artifactId>
+                <version>4.5.2.0</version>
+                <dependencies>
+                    <dependency>
+                        <groupId>com.github.spotbugs</groupId>
+                        <artifactId>spotbugs</artifactId>
+                        <version>4.5.3</version>
+                    </dependency>
+                </dependencies>
             </plugin>
             <plugin>
                 <groupId>com.github.ekryd.sortpom</groupId>
@@ -292,7 +285,7 @@
                         <configuration>
                             <target>
                                 <chmod dir="${dist.dir}" perm="700">
-                                    <include name="**/*.sh"/>
+                                    <include name="**/*.sh" />
                                 </chmod>
                             </target>
                         </configuration>
@@ -392,7 +385,7 @@
                                                 </goals>
                                             </pluginExecutionFilter>
                                             <action>
-                                                <ignore/>
+                                                <ignore />
                                             </action>
                                         </pluginExecution>
                                         <pluginExecution>
@@ -405,7 +398,7 @@
                                                 </goals>
                                             </pluginExecutionFilter>
                                             <action>
-                                                <ignore/>
+                                                <ignore />
                                             </action>
                                         </pluginExecution>
                                     </pluginExecutions>
@@ -459,10 +452,10 @@
                                 <phase>package</phase>
                                 <configuration>
                                     <target>
-                                        <copy file="${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar" tofile="${dist.dir}/mirus.jar"/>
+                                        <copy file="${project.build.directory}/${project.artifactId}-${project.version}-shaded.jar" tofile="${dist.dir}/mirus.jar" />
                                         <!-- Grant execute permission to all shell scripts in package dir by setting it to 755 -->
                                         <chmod dir="${dist.dir}" perm="755">
-                                            <include name="**/*.sh"/>
+                                            <include name="**/*.sh" />
                                         </chmod>
                                     </target>
                                 </configuration>

diff --git a/src/main/java/com/salesforce/mirus/metrics/MirrorJmxReporter.java b/src/main/java/com/salesforce/mirus/metrics/MirrorJmxReporter.java
@@ -1,25 +1,45 @@
 package com.salesforce.mirus.metrics;
 
+import com.google.common.collect.Sets;
 import com.salesforce.mirus.MirusSourceConnector;
 import java.util.*;
+import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 import org.apache.kafka.common.MetricNameTemplate;
 import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.metrics.MetricConfig;
 import org.apache.kafka.common.metrics.Metrics;
 import org.apache.kafka.common.metrics.Sensor;
 import org.apache.kafka.common.metrics.stats.*;
+import org.apache.kafka.common.utils.Time;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 public class MirrorJmxReporter extends AbstractMirusJmxReporter {
 
   private static final Logger logger = LoggerFactory.getLogger(MirrorJmxReporter.class);
 
+  public static final Map<Long, String> LATENCY_BUCKETS =
+      Map.of(
+          TimeUnit.MINUTES.toMillis(0),
+          "0m",
+          TimeUnit.MINUTES.toMillis(5),
+          "5m",
+          TimeUnit.MINUTES.toMillis(10),
+          "10m",
+          TimeUnit.MINUTES.toMillis(30),
+          "30m",
+          TimeUnit.MINUTES.toMillis(60),
+          "60m",
+          TimeUnit.HOURS.toMillis(12),
+          "12h");
+
   private static MirrorJmxReporter instance = null;
 
   private static final String SOURCE_CONNECTOR_GROUP = MirusSourceConnector.class.getSimpleName();
 
   private static final Set<String> TOPIC_TAGS = new HashSet<>(Collections.singletonList("topic"));
+  private static final Set<String> TOPIC_BUCKET_TAGS = Sets.newHashSet("topic", "bucket");
 
   private static final MetricNameTemplate REPLICATION_LATENCY =
       new MetricNameTemplate(
@@ -38,16 +58,25 @@ public class MirrorJmxReporter extends AbstractMirusJmxReporter {
           "replication-latency-ms-avg", SOURCE_CONNECTOR_GROUP,
           "Average time it takes records to replicate from source to target cluster.", TOPIC_TAGS);
 
+  protected static final MetricNameTemplate HISTOGRAM_LATENCY =
+      new MetricNameTemplate(
+          "replication-latency-histogram",
+          SOURCE_CONNECTOR_GROUP,
+          "Cumulative histogram counting records delivered per second with latency exceeding a set of fixed bucket thresholds.",
+          TOPIC_BUCKET_TAGS);
+
   // Map of topics to their metric objects
   private final Map<String, Sensor> topicSensors;
   private final Set<TopicPartition> topicPartitionSet;
+  private final Map<String, TreeMap<Long, Sensor>> histogramLatencySensors;
 
   private MirrorJmxReporter() {
-    super(new Metrics());
+    super(new Metrics(new MetricConfig(), new ArrayList<>(0), Time.SYSTEM, true));
     metrics.sensor("replication-latency");
 
     topicSensors = new HashMap<>();
     topicPartitionSet = new HashSet<>();
+    histogramLatencySensors = new HashMap<>();
 
     logger.info("Initialized MirrorJMXReporter");
   }
@@ -73,6 +102,15 @@ public synchronized void addTopics(List<TopicPartition> topicPartitions) {
             .filter(topic -> !topicSensors.containsKey(topic))
             .collect(Collectors.toMap(topic -> topic, this::createTopicSensor)));
     topicPartitionSet.addAll(topicPartitions);
+
+    for (TopicPartition topicPartition : topicPartitions) {
+      TreeMap<Long, Sensor> bucketSensors = new TreeMap<>();
+      String topic = topicPartition.topic();
+      LATENCY_BUCKETS.forEach(
+          (edgeMillis, bucketName) ->
+              bucketSensors.put(edgeMillis, createHistogramSensor(topic, bucketName)));
+      histogramLatencySensors.put(topic, bucketSensors);
+    }
   }
 
   /**
@@ -104,6 +142,7 @@ public synchronized void removeTopics(List<TopicPartition> topicPartitions) {
         topic -> {
           metrics.removeSensor(replicationLatencySensorName(topic));
           topicSensors.remove(topic);
+          histogramLatencySensors.remove(topic);
         });
   }
 
@@ -112,6 +151,24 @@ public synchronized void recordMirrorLatency(String topic, long millis) {
     if (sensor != null) {
       sensor.record((double) millis);
     }
+
+    TreeMap<Long, Sensor> bucketSensors = histogramLatencySensors.get(topic);
+    for (Map.Entry<Long, Sensor> sensorEntry : bucketSensors.entrySet()) {
+      long edgeMillis = sensorEntry.getKey();
+      Sensor bucketSensor = sensorEntry.getValue();
+      if (millis >= edgeMillis) {
+        if (bucketSensor.hasExpired()) {
+          String bucket = LATENCY_BUCKETS.get(edgeMillis);
+          // explicitly replace the expired sensor with a new one
+          metrics.removeSensor(histogramLatencySensorName(topic, bucket));
+          bucketSensor = createHistogramSensor(topic, bucket);
+        }
+        bucketSensor.record(1);
+      } else {
+        // bucket sensors are sorted by edgeMillis
+        break;
+      }
+    }
   }
 
   private Sensor createTopicSensor(String topic) {
@@ -127,7 +184,32 @@ private Sensor createTopicSensor(String topic) {
     return sensor;
   }
 
+  private Sensor createHistogramSensor(String topic, String bucket) {
+    Map<String, String> tags = new LinkedHashMap<>();
+    tags.put("topic", topic);
+    tags.put("bucket", bucket);
+
+    // bucket sensor will be expired after 5 mins if inactive
+    // this is to prevent inactive bucket sensors from reporting too many zero value metrics
+    Sensor sensor =
+        metrics.sensor(
+            histogramLatencySensorName(topic, bucket),
+            null,
+            TimeUnit.MINUTES.toSeconds(5),
+            Sensor.RecordingLevel.INFO,
+            null);
+    sensor.add(
+        metrics.metricInstance(HISTOGRAM_LATENCY, tags),
+        new Rate(TimeUnit.SECONDS, new WindowedSum()));
+
+    return sensor;
+  }
+
   private String replicationLatencySensorName(String topic) {
     return topic + "-" + "replication-latency";
   }
+
+  private String histogramLatencySensorName(String topic, String bucket) {
+    return topic + "-" + bucket + "-" + "histogram-latency";
+  }
 }
diff --git a/src/test/java/com/salesforce/mirus/metrics/MirrorJmxReporterTest.java b/src/test/java/com/salesforce/mirus/metrics/MirrorJmxReporterTest.java
@@ -0,0 +1,59 @@
+package com.salesforce.mirus.metrics;
+
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.metrics.Metrics;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MirrorJmxReporterTest {
+
+  private MirrorJmxReporter mirrorJmxReporter;
+  private Metrics metrics;
+  private final String TEST_TOPIC = "TestTopic";
+
+  @Before
+  public void setUp() throws Exception {
+    mirrorJmxReporter = MirrorJmxReporter.getInstance();
+    metrics = mirrorJmxReporter.metrics;
+  }
+
+  @Test
+  public void updateLatencyMetrics() {
+    TopicPartition topicPartition = new TopicPartition(TEST_TOPIC, 1);
+    mirrorJmxReporter.addTopics(List.of(topicPartition));
+
+    mirrorJmxReporter.recordMirrorLatency(TEST_TOPIC, 500);
+
+    Map<String, String> tags = new LinkedHashMap<>();
+    tags.put("topic", TEST_TOPIC);
+    tags.put("bucket", "0m");
+    Object value =
+        metrics
+            .metrics()
+            .get(
+                metrics.metricName(
+                    MirrorJmxReporter.HISTOGRAM_LATENCY.name(),
+                    MirrorJmxReporter.HISTOGRAM_LATENCY.group(),
+                    MirrorJmxReporter.HISTOGRAM_LATENCY.description(),
+                    tags))
+            .metricValue();
+    Assert.assertTrue((double) value > 0);
+
+    tags.put("bucket", "12h");
+    value =
+        metrics
+            .metrics()
+            .get(
+                metrics.metricName(
+                    MirrorJmxReporter.HISTOGRAM_LATENCY.name(),
+                    MirrorJmxReporter.HISTOGRAM_LATENCY.group(),
+                    MirrorJmxReporter.HISTOGRAM_LATENCY.description(),
+                    tags))
+            .metricValue();
+    Assert.assertTrue((double) value == 0);
+  }
+}