datahub-project · david-leifker · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024 · coderabbitai
diff --git a/docs/how/kafka-config.md b/docs/how/kafka-config.md
@@ -116,6 +116,27 @@ We've included an environment variable to customize the consumer group id, if yo
 
 - `KAFKA_CONSUMER_GROUP_ID`: The name of the kafka consumer's group id.
 
+#### datahub-mae-consumer MCL Hooks
+
+By default, all MetadataChangeLog processing hooks execute as part of the same kafka consumer group based on the 
+previously mentioned `KAFKA_CONSUMER_GROUP_ID`.
+
+The various MCL Hooks could alsp be separated into separate groups which allows for controlling parallelization and 
+prioritization of the hooks.
-The various MCL Hooks could alsp be separated into separate groups which allows for controlling parallelization and 
-prioritization of the hooks.
+The various MCL Hooks could also be separated into separate groups, which allows for controlling parallelization and 
+prioritization of the hooks.
-The various MCL Hooks could alsp be separated into separate groups which allows for controlling parallelization and 
-prioritization of the hooks.
+The various MCL Hooks could also be separated into separate groups, which allows for controlling parallelization and 
+prioritization of the hooks.
+
+For example, the `UpdateIndicesHook` and `SiblingsHook` processing can be delayed by other hooks. Separating these
+hooks into their own group can reduce latency from these other hooks. The `application.yaml` configuration
+includes options for assigning a suffix to the consumer group, see `consumerGroupSuffix`.
+
+| Environment Variable                           | Default | Description                                                                                 |
+|------------------------------------------------|---------|---------------------------------------------------------------------------------------------|
+| SIBLINGS_HOOK_CONSUMER_GROUP_SUFFIX            | ''      | Siblings processing hook. Considered one of the primary hooks in the `datahub-mae-consumer` |
+| UPDATE_INDICES_CONSUMER_GROUP_SUFFIX           | ''      | Primary processing hook.                                                                    |
+| INGESTION_SCHEDULER_HOOK_CONSUMER_GROUP_SUFFIX | ''      | Scheduled ingestion hook.                                                                   |
+| INCIDENTS_HOOK_CONSUMER_GROUP_SUFFIX           | ''      | Incidents hook.                                                                             |
+| ECE_CONSUMER_GROUP_SUFFIX                      | ''      | Entity Change Event hook which publishes to the Platform Events topic.                      |
+| FORMS_HOOK_CONSUMER_GROUP_SUFFIX               | ''      | Forms processing.                                                                           |
+
 ## Applying Configurations
 
 ### Docker

diff --git a/...bs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java b/...bs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java
@@ -18,8 +18,6 @@
       "com.linkedin.metadata.service",
       "com.datahub.event",
       "com.linkedin.gms.factory.kafka",
-      "com.linkedin.gms.factory.kafka.common",
-      "com.linkedin.gms.factory.kafka.schemaregistry",
       "com.linkedin.metadata.boot.kafka",
       "com.linkedin.metadata.kafka",
       "com.linkedin.metadata.dao.producer",
@@ -34,7 +32,10 @@
       "com.linkedin.gms.factory.context",
       "com.linkedin.gms.factory.timeseries",
       "com.linkedin.gms.factory.assertion",
-      "com.linkedin.gms.factory.plugins"
+      "com.linkedin.gms.factory.plugins",
+      "com.linkedin.gms.factory.change",
+      "com.datahub.event.hook",
+      "com.linkedin.gms.factory.notifications"
     },
     excludeFilters = {
       @ComponentScan.Filter(

diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListener.java
@@ -0,0 +1,103 @@
+package com.linkedin.metadata.kafka;
+
+import com.codahale.metrics.Histogram;
+import com.codahale.metrics.MetricRegistry;
+import com.codahale.metrics.Timer;
+import com.linkedin.metadata.EventUtils;
+import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook;
+import com.linkedin.metadata.utils.metrics.MetricUtils;
+import com.linkedin.mxe.MetadataChangeLog;
+import io.datahubproject.metadata.context.OperationContext;
+import java.util.List;
+import java.util.stream.Collectors;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+
+@Slf4j
+public class MCLKafkaListener {
+  private static final Histogram kafkaLagStats =
+      MetricUtils.get()
+          .histogram(
+              MetricRegistry.name(
+                  "com.linkedin.metadata.kafka.MetadataChangeLogProcessor", "kafkaLag"));
+
+  private final String consumerGroupId;
+  private final List<MetadataChangeLogHook> hooks;
+
+  public MCLKafkaListener(
+      OperationContext systemOperationContext,
+      String consumerGroup,
+      List<MetadataChangeLogHook> hooks) {
+    this.consumerGroupId = consumerGroup;
+    this.hooks = hooks;
+    this.hooks.forEach(hook -> hook.init(systemOperationContext));
+
+    log.info(
+        "Enabled MCL Hooks - Group: {} Hooks: {}",
+        consumerGroup,
+        hooks.stream().map(hook -> hook.getClass().getSimpleName()).collect(Collectors.toList()));
+  }
+
+  public void consume(final ConsumerRecord<String, GenericRecord> consumerRecord) {
+    try (Timer.Context i = MetricUtils.timer(this.getClass(), "consume").time()) {
+      kafkaLagStats.update(System.currentTimeMillis() - consumerRecord.timestamp());
+      final GenericRecord record = consumerRecord.value();
+      log.debug(
+          "Got MCL event consumer: {} key: {}, topic: {}, partition: {}, offset: {}, value size: {}, timestamp: {}",
+          consumerGroupId,
+          consumerRecord.key(),
+          consumerRecord.topic(),
+          consumerRecord.partition(),
+          consumerRecord.offset(),
+          consumerRecord.serializedValueSize(),
+          consumerRecord.timestamp());
+      MetricUtils.counter(this.getClass(), consumerGroupId + "_received_mcl_count").inc();
+
+      MetadataChangeLog event;
+      try {
+        event = EventUtils.avroToPegasusMCL(record);
+      } catch (Exception e) {
+        MetricUtils.counter(
+                this.getClass(), consumerGroupId + "_avro_to_pegasus_conversion_failure")
+            .inc();
+        log.error("Error deserializing message due to: ", e);
+        log.error("Message: {}", record.toString());
+        return;
+      }
+
+      log.info(
+          "Invoking MCL hooks for consumer: {} urn: {}, aspect name: {}, entity type: {}, change type: {}",
+          consumerGroupId,
+          event.getEntityUrn(),
+          event.hasAspectName() ? event.getAspectName() : null,
+          event.hasEntityType() ? event.getEntityType() : null,
+          event.hasChangeType() ? event.getChangeType() : null);
+
+      // Here - plug in additional "custom processor hooks"
+      for (MetadataChangeLogHook hook : this.hooks) {
+        log.info(
+            "Invoking MCL hook {} for urn: {}",
+            hook.getClass().getSimpleName(),
+            event.getEntityUrn());
+        try (Timer.Context ignored =
+            MetricUtils.timer(this.getClass(), hook.getClass().getSimpleName() + "_latency")
+                .time()) {
+          hook.invoke(event);
+        } catch (Exception e) {
+          // Just skip this hook and continue. - Note that this represents "at most once"//
+          // processing.
+          MetricUtils.counter(this.getClass(), hook.getClass().getSimpleName() + "_failure").inc();
+          log.error(
+              "Failed to execute MCL hook with name {}", hook.getClass().getCanonicalName(), e);
+        }
+      }
+      // TODO: Manually commit kafka offsets after full processing.
+      MetricUtils.counter(this.getClass(), consumerGroupId + "_consumed_mcl_count").inc();
+      log.info(
+          "Successfully completed MCL hooks for consumer: {} urn: {}",
+          consumerGroupId,
+          event.getEntityUrn());
+    }
+  }
+}
diff --git a/...obs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java b/...obs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/MCLKafkaListenerRegistrar.java
@@ -0,0 +1,120 @@
+package com.linkedin.metadata.kafka;
+
+import com.linkedin.metadata.kafka.config.MetadataChangeLogProcessorCondition;
+import com.linkedin.metadata.kafka.hook.MetadataChangeLogHook;
+import com.linkedin.mxe.Topics;
+import io.datahubproject.metadata.context.OperationContext;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import javax.annotation.Nonnull;
+import lombok.SneakyThrows;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.springframework.beans.factory.InitializingBean;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.context.annotation.Conditional;
+import org.springframework.kafka.annotation.EnableKafka;
+import org.springframework.kafka.config.KafkaListenerContainerFactory;
+import org.springframework.kafka.config.KafkaListenerEndpoint;
+import org.springframework.kafka.config.KafkaListenerEndpointRegistry;
+import org.springframework.kafka.config.MethodKafkaListenerEndpoint;
+import org.springframework.messaging.handler.annotation.support.DefaultMessageHandlerMethodFactory;
+import org.springframework.stereotype.Component;
+
+@Slf4j
+@EnableKafka
+@Component
+@Conditional(MetadataChangeLogProcessorCondition.class)
+public class MCLKafkaListenerRegistrar implements InitializingBean {
+
+  @Autowired
+  @Qualifier("systemOperationContext")
+  private OperationContext systemOperationContext;
+
+  @Autowired private KafkaListenerEndpointRegistry kafkaListenerEndpointRegistry;
+
+  @Autowired
+  @Qualifier("kafkaEventConsumer")
+  private KafkaListenerContainerFactory<?> kafkaListenerContainerFactory;
+
+  @Value("${METADATA_CHANGE_LOG_KAFKA_CONSUMER_GROUP_ID:generic-mae-consumer-job-client}")
+  private String consumerGroupBase;
+
+  @Value("${METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_VERSIONED + "}")
+  private String mclVersionedTopicName;
+
+  @Value(
+      "${METADATA_CHANGE_LOG_TIMESERIES_TOPIC_NAME:" + Topics.METADATA_CHANGE_LOG_TIMESERIES + "}")
+  private String mclTimeseriesTopicName;
+
+  @Autowired private List<MetadataChangeLogHook> metadataChangeLogHooks;
+
+  @Override
+  public void afterPropertiesSet() {
+    Map<String, List<MetadataChangeLogHook>> hookGroups =
+        getMetadataChangeLogHooks().stream()
+            .collect(Collectors.groupingBy(MetadataChangeLogHook::getConsumerGroupSuffix));
+
+    log.info(
+        "MetadataChangeLogProcessor Consumer Groups: {}",
+        hookGroups.keySet().stream().map(this::buildConsumerGroupName).collect(Collectors.toSet()));
+
+    hookGroups.forEach(
+        (key, hooks) -> {
+          KafkaListenerEndpoint kafkaListenerEndpoint =
+              createListenerEndpoint(
+                  buildConsumerGroupName(key),
+                  List.of(mclVersionedTopicName, mclTimeseriesTopicName),
+                  hooks);
+          registerMCLKafkaListener(kafkaListenerEndpoint, true);
+        });
+  }
+
+  public List<MetadataChangeLogHook> getMetadataChangeLogHooks() {
+    return metadataChangeLogHooks.stream()
+        .filter(MetadataChangeLogHook::isEnabled)
+        .sorted(Comparator.comparing(MetadataChangeLogHook::executionOrder))
+        .toList();
+  }
+
+  @SneakyThrows
+  public void registerMCLKafkaListener(
+      KafkaListenerEndpoint kafkaListenerEndpoint, boolean startImmediately) {
+    kafkaListenerEndpointRegistry.registerListenerContainer(
+        kafkaListenerEndpoint, kafkaListenerContainerFactory, startImmediately);
+  }
+
+  private KafkaListenerEndpoint createListenerEndpoint(
+      String consumerGroupId, List<String> topics, List<MetadataChangeLogHook> hooks) {
+    MethodKafkaListenerEndpoint<String, GenericRecord> kafkaListenerEndpoint =
+        new MethodKafkaListenerEndpoint<>();
+    kafkaListenerEndpoint.setId(consumerGroupId);
+    kafkaListenerEndpoint.setGroupId(consumerGroupId);
+    kafkaListenerEndpoint.setAutoStartup(true);
+    kafkaListenerEndpoint.setTopics(topics.toArray(new String[topics.size()]));
+    kafkaListenerEndpoint.setMessageHandlerMethodFactory(new DefaultMessageHandlerMethodFactory());
+    kafkaListenerEndpoint.setBean(
+        new MCLKafkaListener(systemOperationContext, consumerGroupId, hooks));
+    try {
+      kafkaListenerEndpoint.setMethod(
+          MCLKafkaListener.class.getMethod("consume", ConsumerRecord.class));
+    } catch (NoSuchMethodException e) {
+      throw new RuntimeException(e);
+    }
+
+    return kafkaListenerEndpoint;
+  }
+
+  private String buildConsumerGroupName(@Nonnull String suffix) {
+    if (suffix.isEmpty()) {
+      return consumerGroupBase;
+    } else {
+      return String.join("-", consumerGroupBase, suffix);
+    }
+  }
+}