diff --git a/charts/datahub/Chart.yaml b/charts/datahub/Chart.yaml index 7dc325c75..49866adc8 100644 --- a/charts/datahub/Chart.yaml +++ b/charts/datahub/Chart.yaml @@ -4,13 +4,13 @@ description: A Helm chart for LinkedIn DataHub type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.164 +version: 0.2.165 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 0.10.2 +appVersion: 0.10.3 dependencies: - name: datahub-gms - version: 0.2.147 + version: 0.2.148 repository: file://./subcharts/datahub-gms condition: datahub-gms.enabled - name: datahub-frontend @@ -18,11 +18,11 @@ dependencies: repository: file://./subcharts/datahub-frontend condition: datahub-frontend.enabled - name: datahub-mae-consumer - version: 0.2.143 + version: 0.2.144 repository: file://./subcharts/datahub-mae-consumer condition: global.datahub_standalone_consumers_enabled - name: datahub-mce-consumer - version: 0.2.145 + version: 0.2.146 repository: file://./subcharts/datahub-mce-consumer condition: global.datahub_standalone_consumers_enabled - name: datahub-ingestion-cron @@ -30,7 +30,7 @@ dependencies: repository: file://./subcharts/datahub-ingestion-cron condition: datahub-ingestion-cron.enabled - name: acryl-datahub-actions - version: 0.2.136 + version: 0.2.137 repository: file://./subcharts/acryl-datahub-actions condition: acryl-datahub-actions.enabled maintainers: diff --git a/charts/datahub/README.md b/charts/datahub/README.md index 8a3e16bda..40a2dcc61 100644 --- a/charts/datahub/README.md +++ b/charts/datahub/README.md @@ -95,7 +95,7 @@ helm install datahub datahub/datahub --values <> | global.kafka.topics.metadata_change_log_versioned_topic_name | string | `"MetadataChangeLog_Versioned_v1"` | Kafka topic name for Versioned Metadata Change Log events | | global.kafka.topics.metadata_change_log_timeseries_topic_name | string | `"MetadataChangeLog_Timeseries_v1"` | Kafka topic name for Timeseries Metadata Change Log events | | global.kafka.topics.platform_event_topic_name | string | `"PlatformEvent_v1"` | Kafka topic name for Platform events | -| global.kafka.schemaregistry.url | string | `"http://prerequisites-cp-schema-registry:8081"` | URL to kafka schema registry | +| global.kafka.schemaregistry.url | string | `` | URL to kafka schema registry if using `KAFKA` type | | global.neo4j.host | string | `"prerequisites-neo4j:7474"` | Neo4j host address (with port) | | global.neo4j.uri | string | `"bolt://prerequisites-neo4j"` | Neo4j URI | | global.neo4j.username | string | `"neo4j"` | Neo4j user name | @@ -139,7 +139,7 @@ helm install datahub datahub/datahub --values <> | global.elasticsearch.auth.password.secretRef | string | `""` | Secret that contains the elasticsearch password | | global.elasticsearch.auth.password.secretKey | string | `""` | Secret key that contains the elasticsearch password | | global.elasticsearch.auth.password.value | string | `""` | Alternative to using the secret above, uses raw string value instead | -| global.kafka.schemaregistry.type | string | `"KAFKA"` | Type of schema registry (KAFKA or AWS_GLUE) | +| global.kafka.schemaregistry.type | string | `"INTERNAL"` | Type of schema registry (INTERNAL, KAFKA, or AWS_GLUE) | | global.kafka.schemaregistry.glue.region | string | `""` | Region of the AWS Glue schema registry | | global.kafka.schemaregistry.glue.registry | string | `""` | Name of the AWS Glue schema registry | | datahub.metadata_service_authentication.enabled | bool | `false` | Whether Metadata Service Authentication is enabled. | diff --git a/charts/datahub/subcharts/acryl-datahub-actions/Chart.yaml b/charts/datahub/subcharts/acryl-datahub-actions/Chart.yaml index 0c0e1a588..d2e46f93b 100644 --- a/charts/datahub/subcharts/acryl-datahub-actions/Chart.yaml +++ b/charts/datahub/subcharts/acryl-datahub-actions/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.136 +version: 0.2.137 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: 0.0.11 diff --git a/charts/datahub/subcharts/acryl-datahub-actions/templates/deployment.yaml b/charts/datahub/subcharts/acryl-datahub-actions/templates/deployment.yaml index aa1f79981..071916d55 100644 --- a/charts/datahub/subcharts/acryl-datahub-actions/templates/deployment.yaml +++ b/charts/datahub/subcharts/acryl-datahub-actions/templates/deployment.yaml @@ -73,8 +73,13 @@ spec: value: "{{ .Values.global.datahub.gms.port }}" - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" + {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} + - name: SCHEMA_REGISTRY_URL + value: {{ printf "http://%s-%s:%s/schema-registry/api/" .Release.Name "datahub-gms" .Values.global.datahub.gms.port }} + {{- else if eq .Values.global.kafka.schemaregistry.type "KAFKA" }} - name: SCHEMA_REGISTRY_URL value: "{{ .Values.global.kafka.schemaregistry.url }}" + {{- end }} - name: KAFKA_AUTO_OFFSET_POLICY value: "{{ .Values.actions.kafkaAutoOffsetPolicy }}" {{- if .Values.global.springKafkaConfigurationOverrides }} diff --git a/charts/datahub/subcharts/datahub-gms/Chart.yaml b/charts/datahub/subcharts/datahub-gms/Chart.yaml index ae513efc1..67cfa2aa4 100644 --- a/charts/datahub/subcharts/datahub-gms/Chart.yaml +++ b/charts/datahub/subcharts/datahub-gms/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for LinkedIn DataHub's datahub-gms component type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.147 +version: 0.2.148 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: v0.10.0 diff --git a/charts/datahub/subcharts/datahub-gms/templates/configmap.yaml b/charts/datahub/subcharts/datahub-gms/templates/configmap.yaml new file mode 100644 index 000000000..1565e7a19 --- /dev/null +++ b/charts/datahub/subcharts/datahub-gms/templates/configmap.yaml @@ -0,0 +1,14 @@ +{{- if .Values.global.elasticsearch.search.custom.enabled -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ printf "%s-%s" .Release.Name "search-custom" }} + labels: + {{- include "datahub-gms.labels" . | nindent 4 }} + {{- range $key, $val := .Values.extraLabels }} + {{ $key }}: {{ $val | quote }} + {{- end }} +data: + search_config.yml: | + {{- toYaml .Values.global.elasticsearch.search.custom.config | nindent 4 }} +{{- end }} \ No newline at end of file diff --git a/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml b/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml index 4a9be56b7..190dcf20e 100644 --- a/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml @@ -47,6 +47,11 @@ spec: defaultMode: 0444 secretName: {{ .name }} {{- end }} + {{- if .Values.global.elasticsearch.search.custom.enabled }} + - configMap: + name: {{ printf "%s-%s" .Release.Name "search-custom" }} + name: search-config + {{- end }} {{- with .Values.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} @@ -89,6 +94,12 @@ spec: periodSeconds: {{ .Values.readinessProbe.periodSeconds }} failureThreshold: {{ .Values.readinessProbe.failureThreshold }} env: + {{- if .Values.global.elasticsearch.search.custom.enabled }} + - name: ELASTICSEARCH_QUERY_CUSTOM_CONFIG_ENABLED + value: "true" + - name: ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE + value: "/datahub/datahub-gms/resources/search/search_config.yml" + {{- end}} {{- if gt .Values.replicaCount 1.0}} - name: SEARCH_SERVICE_CACHE_IMPLEMENTATION value: "hazelcast" @@ -137,9 +148,12 @@ spec: value: "{{ .Values.global.sql.datasource.driver }}" - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" - {{- with .Values.global.kafka.schemaregistry.url }} + {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: KAFKA_SCHEMAREGISTRY_URL - value: "{{ . }}" + value: {{ printf "http://localhost:%s/schema-registry/api/" .Values.global.datahub.gms.port }} + {{- else if eq .Values.global.kafka.schemaregistry.type "KAFKA" }} + - name: KAFKA_SCHEMAREGISTRY_URL + value: "{{ .Values.global.kafka.schemaregistry.url }}" {{- end }} {{- with .Values.global.kafka.schemaregistry.type }} - name: SCHEMA_REGISTRY_TYPE @@ -331,6 +345,10 @@ spec: - name: datahub-certs-dir mountPath: {{ .path | default "/mnt/certs" }} {{- end }} + {{- if .Values.global.elasticsearch.search.custom.enabled }} + - name: search-config + mountPath: "/datahub/datahub-gms/resources/search" + {{- end }} {{- with .Values.extraVolumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} diff --git a/charts/datahub/subcharts/datahub-gms/values.yaml b/charts/datahub/subcharts/datahub-gms/values.yaml index fc06079fd..1056df635 100644 --- a/charts/datahub/subcharts/datahub-gms/values.yaml +++ b/charts/datahub/subcharts/datahub-gms/values.yaml @@ -200,7 +200,7 @@ global: enable_retention: false ## Set to true to enable retention on local DB ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. - alwaysEmitChangeLog: true + alwaysEmitChangeLog: false ## Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading enableGraphDiffMode: true diff --git a/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml b/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml index fd7d46e1f..0fdbf6b62 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.143 +version: 0.2.144 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: v0.10.0 diff --git a/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml b/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml index 4b6e1ad3d..c069074b8 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml @@ -102,9 +102,12 @@ spec: value: "{{ .Values.global.datahub.gms.port }}" - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" - {{- with .Values.global.kafka.schemaregistry.url }} + {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: KAFKA_SCHEMAREGISTRY_URL - value: "{{ . }}" + value: {{ printf "http://%s-%s:%s/schema-registry/api/" .Release.Name "datahub-gms" .Values.global.datahub.gms.port }} + {{- else if eq .Values.global.kafka.schemaregistry.type "KAFKA" }} + - name: KAFKA_SCHEMAREGISTRY_URL + value: "{{ .Values.global.kafka.schemaregistry.url }}" {{- end }} {{- with .Values.global.kafka.schemaregistry.type }} - name: SCHEMA_REGISTRY_TYPE diff --git a/charts/datahub/subcharts/datahub-mae-consumer/values.yaml b/charts/datahub/subcharts/datahub-mae-consumer/values.yaml index 2a2b91ea9..f9104089c 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/values.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/values.yaml @@ -207,7 +207,7 @@ global: # secretKey: ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. - alwaysEmitChangeLog: true + alwaysEmitChangeLog: false ## Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading enableGraphDiffMode: true diff --git a/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml b/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml index 241d0827d..85eeabf88 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.145 +version: 0.2.146 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: v0.10.0 diff --git a/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml b/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml index 1fea2719a..dff8a2d58 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml @@ -98,9 +98,12 @@ spec: value: "true" - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" - {{- with .Values.global.kafka.schemaregistry.url }} + {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: KAFKA_SCHEMAREGISTRY_URL - value: "{{ . }}" + value: {{ printf "http://%s-%s:%s/schema-registry/api/" .Release.Name "datahub-gms" .Values.global.datahub.gms.port }} + {{- else if eq .Values.global.kafka.schemaregistry.type "KAFKA" }} + - name: KAFKA_SCHEMAREGISTRY_URL + value: "{{ .Values.global.kafka.schemaregistry.url }}" {{- end }} {{- with .Values.global.kafka.schemaregistry.type }} - name: SCHEMA_REGISTRY_TYPE diff --git a/charts/datahub/subcharts/datahub-mce-consumer/values.yaml b/charts/datahub/subcharts/datahub-mce-consumer/values.yaml index 75769726a..892b391d2 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/values.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/values.yaml @@ -200,7 +200,7 @@ global: secretKey: "mysql-password" ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. - alwaysEmitChangeLog: true + alwaysEmitChangeLog: false ## Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading enableGraphDiffMode: true diff --git a/charts/datahub/templates/datahub-upgrade/_upgrade.tpl b/charts/datahub/templates/datahub-upgrade/_upgrade.tpl index 8d784f50e..eec1df561 100644 --- a/charts/datahub/templates/datahub-upgrade/_upgrade.tpl +++ b/charts/datahub/templates/datahub-upgrade/_upgrade.tpl @@ -42,8 +42,13 @@ Return the env variables for upgrade jobs {{- end }} - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" +{{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} +- name: KAFKA_SCHEMAREGISTRY_URL + value: {{ printf "http://%s-%s:%s/schema-registry/api/" .Release.Name "datahub-gms" .Values.global.datahub.gms.port }} +{{- else if eq .Values.global.kafka.schemaregistry.type "KAFKA" }} - name: KAFKA_SCHEMAREGISTRY_URL value: "{{ .Values.global.kafka.schemaregistry.url }}" +{{- end }} - name: ELASTICSEARCH_HOST value: {{ .Values.global.elasticsearch.host | quote }} - name: ELASTICSEARCH_PORT diff --git a/charts/datahub/templates/datahub-upgrade/datahub-system-update-job.yml b/charts/datahub/templates/datahub-upgrade/datahub-system-update-job.yml index 29786df8e..54a946ad2 100644 --- a/charts/datahub/templates/datahub-upgrade/datahub-system-update-job.yml +++ b/charts/datahub/templates/datahub-upgrade/datahub-system-update-job.yml @@ -71,6 +71,14 @@ spec: {{- include "datahub.upgrade.env" . | nindent 12}} - name: DATAHUB_ANALYTICS_ENABLED value: {{ .Values.global.datahub_analytics_enabled | quote }} + {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} + - name: SCHEMA_REGISTRY_SYSTEM_UPDATE + value: "true" + - name: SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS + value: "false" + - name: SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION + value: "true" + {{- end }} {{- with .Values.global.kafka.schemaregistry.type }} - name: SCHEMA_REGISTRY_TYPE value: "{{ . }}" diff --git a/charts/datahub/templates/kafka-setup-job.yml b/charts/datahub/templates/kafka-setup-job.yml index 86bc43980..100dd9dec 100644 --- a/charts/datahub/templates/kafka-setup-job.yml +++ b/charts/datahub/templates/kafka-setup-job.yml @@ -63,6 +63,13 @@ spec: value: {{ .Values.global.kafka.zookeeper.server | quote }} - name: KAFKA_BOOTSTRAP_SERVER value: {{ .Values.global.kafka.bootstrap.server | quote }} + {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} + - name: USE_CONFLUENT_SCHEMA_REGISTRY + value: "false" + {{- else if eq .Values.global.kafka.schemaregistry.type "KAFKA" }} + - name: USE_CONFLUENT_SCHEMA_REGISTRY + value: "true" + {{- end }} {{- if .Values.global.springKafkaConfigurationOverrides }} {{- range $configName, $configValue := .Values.global.springKafkaConfigurationOverrides }} - name: KAFKA_PROPERTIES_{{ $configName | replace "." "_" | upper }} diff --git a/charts/datahub/values.yaml b/charts/datahub/values.yaml index be6f7d7b5..dddea2988 100644 --- a/charts/datahub/values.yaml +++ b/charts/datahub/values.yaml @@ -333,6 +333,81 @@ global: ## graph dao max result size maxResult: 10000 + custom: + enabled: false + # See documentation: https://datahubproject.io/docs/how/search/#customizing-search + config: + # Notes: + # + # First match wins + # + # queryRegex = Java regex syntax + # + # functionScores - See the following for function score syntax + # https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-function-score-query.html + + queryConfigurations: + # Select * + - queryRegex: '[*]|' + simpleQuery: false + prefixMatchQuery: false + exactMatchQuery: false + boolQuery: + must_not: + term: + deprecated: + value: true + functionScore: + functions: + - filter: + term: + materialized: + value: true + weight: 0.8 + score_mode: multiply + boost_mode: multiply + + # Criteria for exact-match only + # Contains quoted or contains underscore then use exact match query + - queryRegex: >- + ["'].+["']|\S+_\S+ + simpleQuery: false + prefixMatchQuery: true + exactMatchQuery: true + functionScore: + functions: + - filter: + term: + materialized: + value: true + weight: 0.8 + - filter: + term: + deprecated: + value: true + weight: 0 + score_mode: multiply + boost_mode: multiply + # default + - queryRegex: .* + simpleQuery: true + prefixMatchQuery: true + exactMatchQuery: true + boolQuery: + must_not: + term: + deprecated: + value: true + functionScore: + functions: + - filter: + term: + materialized: + value: true + weight: 0.8 + score_mode: multiply + boost_mode: multiply + kafka: bootstrap: server: "prerequisites-kafka:9092" @@ -355,8 +430,14 @@ global: # partitions: 3 # replicationFactor: 3 schemaregistry: - url: "http://prerequisites-cp-schema-registry:8081" - type: KAFKA + # GMS Implementation - `url` configured based on component context + type: INTERNAL + # Confluent Kafka Implementation + # type: KAFKA + # url: "http://prerequisites-cp-schema-registry:8081" + + # Glue Implementation - `url` not applicable + # type: AWS_GLUE # glue: # region: us-east-1 # registry: datahub @@ -397,15 +478,15 @@ global: # secretKey: postgres-password # --------------OR---------------- # value: password - + # If you want to use specific PostgreSQL database use extraEnvs # extraEnvs: # - name: "DATAHUB_DB_NAME" - # value: "dh" + # value: "dh" datahub: - version: v0.10.2 + version: v0.10.3 gms: port: "8080" nodePort: "30001" @@ -436,7 +517,7 @@ global: managed_ingestion: enabled: true - defaultCliVersion: "0.10.0" + defaultCliVersion: "0.10.3" metadata_service_authentication: enabled: false @@ -462,7 +543,7 @@ global: # salt: ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. - alwaysEmitChangeLog: true + alwaysEmitChangeLog: false ## Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading enableGraphDiffMode: true diff --git a/charts/prerequisites/Chart.yaml b/charts/prerequisites/Chart.yaml index 1fbf1443e..69c5871b9 100644 --- a/charts/prerequisites/Chart.yaml +++ b/charts/prerequisites/Chart.yaml @@ -4,7 +4,7 @@ description: A Helm chart for packages that Datahub depends on type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.0.15 +version: 0.0.16 dependencies: - name: elasticsearch version: 7.17.3 diff --git a/charts/prerequisites/values.yaml b/charts/prerequisites/values.yaml index 1968f5481..43d57c373 100644 --- a/charts/prerequisites/values.yaml +++ b/charts/prerequisites/values.yaml @@ -66,9 +66,10 @@ postgresql: existingSecret: postgresql-secrets cp-helm-charts: + enabled: false # Schema registry is under the community license cp-schema-registry: - enabled: true + enabled: false kafka: bootstrapServers: "prerequisites-kafka:9092" # <>-kafka:9092 cp-kafka: