From 8479d2f6abdebb2df4f7de20256be554d1e0477e Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Mon, 26 Aug 2024 14:58:33 +0530 Subject: [PATCH] fix(analytics): index description so analytics are correct (#11224) --- .../ReindexDomainDescriptionConfig.java | 29 +++++++++++ .../ReindexDomainDescription.java | 49 +++++++++++++++++++ .../ReindexDomainDescriptionStep.java | 42 ++++++++++++++++ .../com/linkedin/domain/DomainProperties.pdl | 4 ++ .../src/main/resources/application.yaml | 5 ++ 5 files changed, 129 insertions(+) create mode 100644 datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java create mode 100644 datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java create mode 100644 datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java new file mode 100644 index 0000000000000..3cdab0dc4d4bc --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDomainDescriptionConfig.java @@ -0,0 +1,29 @@ +package com.linkedin.datahub.upgrade.config; + +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.datahub.upgrade.system.domaindescription.ReindexDomainDescription; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; + +@Configuration +@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class) +public class ReindexDomainDescriptionConfig { + + @Bean + public NonBlockingSystemUpgrade reindexDomainDescription( + final OperationContext opContext, + final EntityService entityService, + final AspectDao aspectDao, + @Value("${systemUpdate.domainDescription.enabled}") final boolean enabled, + @Value("${systemUpdate.domainDescription.batchSize}") final Integer batchSize, + @Value("${systemUpdate.domainDescription.delayMs}") final Integer delayMs, + @Value("${systemUpdate.domainDescription.limit}") final Integer limit) { + return new ReindexDomainDescription( + opContext, entityService, aspectDao, enabled, batchSize, delayMs, limit); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java new file mode 100644 index 0000000000000..85af912e24f68 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescription.java @@ -0,0 +1,49 @@ +package com.linkedin.datahub.upgrade.system.domaindescription; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +/** + * A job that reindexes all domain aspects as part of reindexing descriptions This is required to + * fix the analytics for domains + */ +@Slf4j +public class ReindexDomainDescription implements NonBlockingSystemUpgrade { + + private final List _steps; + + public ReindexDomainDescription( + @Nonnull OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + boolean enabled, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + if (enabled) { + _steps = + ImmutableList.of( + new ReindexDomainDescriptionStep( + opContext, entityService, aspectDao, batchSize, batchDelayMs, limit)); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return this.getClass().getName(); + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java new file mode 100644 index 0000000000000..1fa8bc92af078 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/domaindescription/ReindexDomainDescriptionStep.java @@ -0,0 +1,42 @@ +package com.linkedin.datahub.upgrade.system.domaindescription; + +import static com.linkedin.metadata.Constants.*; + +import com.linkedin.datahub.upgrade.system.AbstractMCLStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.Nullable; + +@Slf4j +public class ReindexDomainDescriptionStep extends AbstractMCLStep { + + public ReindexDomainDescriptionStep( + OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + super(opContext, entityService, aspectDao, batchSize, batchDelayMs, limit); + } + + @Override + public String id() { + return "domain-description-v1"; + } + + @Nonnull + @Override + protected String getAspectName() { + return DOMAIN_PROPERTIES_ASPECT_NAME; + } + + @Nullable + @Override + protected String getUrnLike() { + return "urn:li:" + DOMAIN_ENTITY_NAME + ":%"; + } +} diff --git a/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl index eb307b726855d..2d93d9399bf72 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/domain/DomainProperties.pdl @@ -26,6 +26,10 @@ record DomainProperties includes CustomProperties { /** * Description of the Domain */ + @Searchable = { + "fieldType": "TEXT", + "hasValuesFieldName": "hasDescription" + } description: optional string /** diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index 5b3673ddca52c..3f1c6dd1a3d7d 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -335,6 +335,11 @@ systemUpdate: batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_BATCH_SIZE:1000} delayMs: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_DELAY_MS:30000} limit: ${BOOTSTRAP_SYSTEM_UPDATE_DATA_JOB_NODE_CLL_LIMIT:0} + domainDescription: + enabled: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_ENABLED:true} + batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_BATCH_SIZE:1000} + delayMs: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_DELAY_MS:30000} + limit: ${BOOTSTRAP_SYSTEM_UPDATE_DOMAIN_DESCRIPTION_CLL_LIMIT:0} browsePathsV2: enabled: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_ENABLED:true} batchSize: ${BOOTSTRAP_SYSTEM_UPDATE_BROWSE_PATHS_V2_BATCH_SIZE:5000}