diff --git a/charts/datahub/Chart.yaml b/charts/datahub/Chart.yaml index c4bde7ae1..107f0d5ab 100644 --- a/charts/datahub/Chart.yaml +++ b/charts/datahub/Chart.yaml @@ -4,13 +4,13 @@ description: A Helm chart for LinkedIn DataHub type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.157 +version: 0.2.158 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: 0.10.0 dependencies: - name: datahub-gms - version: 0.2.144 + version: 0.2.145 repository: file://./subcharts/datahub-gms condition: datahub-gms.enabled - name: datahub-frontend @@ -18,11 +18,11 @@ dependencies: repository: file://./subcharts/datahub-frontend condition: datahub-frontend.enabled - name: datahub-mae-consumer - version: 0.2.141 + version: 0.2.142 repository: file://./subcharts/datahub-mae-consumer condition: global.datahub_standalone_consumers_enabled - name: datahub-mce-consumer - version: 0.2.143 + version: 0.2.144 repository: file://./subcharts/datahub-mce-consumer condition: global.datahub_standalone_consumers_enabled - name: datahub-ingestion-cron diff --git a/charts/datahub/subcharts/datahub-gms/Chart.yaml b/charts/datahub/subcharts/datahub-gms/Chart.yaml index e833e9244..c33ac7ff0 100644 --- a/charts/datahub/subcharts/datahub-gms/Chart.yaml +++ b/charts/datahub/subcharts/datahub-gms/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for LinkedIn DataHub's datahub-gms component type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.144 +version: 0.2.145 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: v0.10.0 diff --git a/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml b/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml index 62d47ec0c..e528bba7e 100644 --- a/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml @@ -273,8 +273,44 @@ spec: - name: ENTITY_SERVICE_ENABLE_RETENTION value: "true" {{- end }} + - name: ELASTICSEARCH_QUERY_MAX_TERM_BUCKET_SIZE + value: {{ .Values.global.elasticsearch.search.maxTermBucketSize | quote }} + - name: ELASTICSEARCH_QUERY_EXACT_MATCH_EXCLUSIVE + value: {{ .Values.global.elasticsearch.search.exactMatch.exclusive | quote }} + - name: ELASTICSEARCH_QUERY_EXACT_MATCH_WITH_PREFIX + value: {{ .Values.global.elasticsearch.search.exactMatch.withPrefix | quote }} + - name: ELASTICSEARCH_QUERY_EXACT_MATCH_FACTOR + value: {{ .Values.global.elasticsearch.search.exactMatch.exactFactor | quote }} + - name: ELASTICSEARCH_QUERY_EXACT_MATCH_PREFIX_FACTOR + value: {{ .Values.global.elasticsearch.search.exactMatch.prefixFactor | quote }} + - name: ELASTICSEARCH_QUERY_EXACT_MATCH_CASE_FACTOR + value: {{ .Values.global.elasticsearch.search.exactMatch.caseSensitivityFactor | quote }} + - name: ELASTICSEARCH_QUERY_EXACT_MATCH_ENABLE_STRUCTURED + value: {{ .Values.global.elasticsearch.search.exactMatch.enableStructured | quote }} + - name: ELASTICSEARCH_SEARCH_GRAPH_TIMEOUT_SECONDS + value: {{ .Values.global.elasticsearch.search.graph.timeoutSeconds | quote }} + - name: ELASTICSEARCH_SEARCH_GRAPH_BATCH_SIZE + value: {{ .Values.global.elasticsearch.search.graph.batchSize | quote }} + - name: ELASTICSEARCH_SEARCH_GRAPH_MAX_RESULT + value: {{ .Values.global.elasticsearch.search.graph.maxResult | quote }} + {{- if .Values.global.datahub.cache.search.enabled }} - name: SEARCH_SERVICE_ENABLE_CACHE - value: {{ .Values.global.datahub.enable_search_cache | quote }} + value: {{ .Values.global.datahub.cache.search.enabled | quote }} + - name: CACHE_TTL_SECONDS + value: {{ .Values.global.datahub.cache.search.primary.ttlSeconds | quote }} + - name: CACHE_MAX_SIZE + value: {{ .Values.global.datahub.cache.search.primary.maxSize | quote }} + - name: CACHE_ENTITY_COUNTS_TTL_SECONDS + value: {{ .Values.global.datahub.cache.search.homepage.entityCounts.ttlSeconds | quote }} + {{- end }} + {{- if .Values.global.datahub.cache.search.lineage.enabled }} + - name: LINEAGE_SEARCH_CACHE_ENABLED + value: {{ .Values.global.datahub.cache.search.lineage.enabled | quote }} + - name: CACHE_SEARCH_LINEAGE_TTL_SECONDS + value: {{ .Values.global.datahub.cache.search.lineage.ttlSeconds | quote }} + - name: CACHE_SEARCH_LINEAGE_LIGHTNING_THRESHOLD + value: {{ .Values.global.datahub.cache.search.lineage.lightningThreshold | quote }} + {{- end }} {{- with .Values.global.elasticsearch.index.enableMappingsReindex }} - name: ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX value: {{ . | quote }} @@ -285,6 +321,8 @@ spec: {{- end }} - name: ALWAYS_EMIT_CHANGE_LOG value: {{ .Values.global.datahub.alwaysEmitChangeLog | quote }} + - name: GRAPH_SERVICE_DIFF_MODE_ENABLED + value: {{ .Values.global.datahub.enableGraphDiffMode | quote }} {{- with .Values.extraEnvs }} {{- toYaml . | nindent 12 }} {{- end }} diff --git a/charts/datahub/subcharts/datahub-gms/values.yaml b/charts/datahub/subcharts/datahub-gms/values.yaml index 94f9f071d..b50e172a4 100644 --- a/charts/datahub/subcharts/datahub-gms/values.yaml +++ b/charts/datahub/subcharts/datahub-gms/values.yaml @@ -193,11 +193,31 @@ global: enable_retention: false ## Set to true to enable retention on local DB - enable_search_cache: false - ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. alwaysEmitChangeLog: true + ## Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading + enableGraphDiffMode: true + + cache: + search: + ## Enable general search caching + enabled: false + ## Configuration for the primary cahe + primary: + ttlSeconds: 600 + maxSize: 10000 + ## Configuration for homepage cache + homepage: + entityCounts: + ttlSeconds: 600 + ## Lineage specific caching options + lineage: + ## Enables in-memory cache for searchAcrossLineage query + enabled: false + ttlSeconds: 86400 + lightningThreshold: 300 + hostAliases: - ip: "192.168.0.104" hostnames: diff --git a/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml b/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml index 1a35c07d9..00237aa88 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.141 +version: 0.2.142 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: v0.10.0 diff --git a/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml b/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml index cdbe25909..18d804265 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml @@ -215,6 +215,8 @@ spec: {{- end }} - name: ALWAYS_EMIT_CHANGE_LOG value: {{ .Values.global.datahub.alwaysEmitChangeLog | quote }} + - name: GRAPH_SERVICE_DIFF_MODE_ENABLED + value: {{ .Values.global.datahub.enableGraphDiffMode | quote }} {{- with .Values.extraEnvs }} {{- toYaml . | nindent 12 }} {{- end }} diff --git a/charts/datahub/subcharts/datahub-mae-consumer/values.yaml b/charts/datahub/subcharts/datahub-mae-consumer/values.yaml index e65e923dd..3118918b8 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/values.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/values.yaml @@ -203,6 +203,9 @@ global: ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. alwaysEmitChangeLog: true + ## Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading + enableGraphDiffMode: true + hostAliases: - ip: "192.168.0.104" hostnames: diff --git a/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml b/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml index 4526b68e7..9963fdc12 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.143 +version: 0.2.144 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: v0.10.0 diff --git a/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml b/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml index 839c0a788..5dc696489 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml @@ -220,6 +220,8 @@ spec: {{- end }} - name: ALWAYS_EMIT_CHANGE_LOG value: {{ .Values.global.datahub.alwaysEmitChangeLog | quote }} + - name: GRAPH_SERVICE_DIFF_MODE_ENABLED + value: {{ .Values.global.datahub.enableGraphDiffMode | quote }} {{- with .Values.extraEnvs }} {{- toYaml . | nindent 12 }} {{- end }} diff --git a/charts/datahub/subcharts/datahub-mce-consumer/values.yaml b/charts/datahub/subcharts/datahub-mce-consumer/values.yaml index c8360da7e..79aa112b2 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/values.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/values.yaml @@ -182,9 +182,6 @@ global: ## The following options control settings for datahub-upgrade job which will ## managed ES indices and other update related work enabled: true - - ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. - alwaysEmitChangeLog: true sql: datasource: @@ -196,6 +193,12 @@ global: secretRef: "mysql-secrets" secretKey: "mysql-password" + ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. + alwaysEmitChangeLog: true + + ## Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading + enableGraphDiffMode: true + hostAliases: - ip: "192.168.0.104" hostnames: diff --git a/charts/datahub/values.yaml b/charts/datahub/values.yaml index 7ce17671b..f201a32bf 100644 --- a/charts/datahub/values.yaml +++ b/charts/datahub/values.yaml @@ -249,6 +249,35 @@ global: ## ensures a complete backup of the original index is preserved. allowDocCountMismatch: false + ## Search related configuration + search: + ## Maximum terms in aggregations + maxTermBucketSize: 20 + + ## Configuration around exact matching for search + exactMatch: + ## if false will only apply weights, if true will exclude non-exact + exclusive: false + ## include prefix exact matches + withPrefix: true + ## boost multiplier when exact with case + exactFactor: 2.0 + ## boost multiplier when exact prefix + prefixFactor: 1.6 + ## stacked boost multiplier when case mismatch + caseSensitivityFactor: 0.7 + ## enable exact match on structured search + enableStructured: true + + ## Configuration for graph service dao + graph: + ## graph dao timeout seconds + timeoutSeconds: 50 + ## graph dao batch size + batchSize: 1000 + ## graph dao max result size + maxResult: 10000 + kafka: bootstrap: server: "prerequisites-kafka:9092" @@ -374,6 +403,9 @@ global: ## Enables always emitting a MCL even when no changes are detected. Used for Time Based Lineage when no changes occur. alwaysEmitChangeLog: true + ## Enables diff mode for graph writes, uses a different code path that produces a diff from previous to next to write relationships instead of wholesale deleting edges and reading + enableGraphDiffMode: true + # hostAliases: # - ip: "192.168.0.104" # hostnames: