Skip to content

Commit

Permalink
feat(elasticsearch): adding elasticsearch index job (#228)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker authored Jan 3, 2023
1 parent 27639e6 commit 2b637bc
Show file tree
Hide file tree
Showing 9 changed files with 287 additions and 5 deletions.
6 changes: 3 additions & 3 deletions charts/datahub/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@ description: A Helm chart for LinkedIn DataHub
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
version: 0.2.126
version: 0.2.127
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: 0.9.5
dependencies:
- name: datahub-gms
version: 0.2.121
version: 0.2.122
repository: file://./subcharts/datahub-gms
condition: datahub-gms.enabled
- name: datahub-frontend
version: 0.2.121
repository: file://./subcharts/datahub-frontend
condition: datahub-frontend.enabled
- name: datahub-mae-consumer
version: 0.2.121
version: 0.2.122
repository: file://./subcharts/datahub-mae-consumer
condition: global.datahub_standalone_consumers_enabled
- name: datahub-mce-consumer
Expand Down
2 changes: 1 addition & 1 deletion charts/datahub/subcharts/datahub-gms/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ description: A Helm chart for LinkedIn DataHub's datahub-gms component
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
version: 0.2.121
version: 0.2.122
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: v0.9.3
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ spec:
- name: INDEX_PREFIX
value: {{ . }}
{{- end }}
{{- if .Values.global.elasticsearch.index.upgrade.enabled }}
- name: BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID
value: {{ printf "%s-%s" .Release.Name "bihe-consumer-job-client-gms" }}
{{- end }}
- name: GRAPH_SERVICE_IMPL
value: {{ .Values.global.graph_service_impl }}
{{- if eq .Values.global.graph_service_impl "neo4j" }}
Expand Down
6 changes: 6 additions & 0 deletions charts/datahub/subcharts/datahub-gms/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ global:
port: "9200"
skipcheck: "false"

## Settings for supporting datahub-upgrade job for index creation/reindex
index:
## The following options control settings for datahub-upgrade job when creating or reindexing indices
upgrade:
enabled: true

kafka:
bootstrap:
server: "broker:9092"
Expand Down
2 changes: 1 addition & 1 deletion charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
version: 0.2.121
version: 0.2.122
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: v0.9.3
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ spec:
- name: INDEX_PREFIX
value: {{ . }}
{{- end }}
{{- if .Values.global.elasticsearch.index.upgrade.enabled }}
- name: BUILD_INDICES_HISTORY_KAFKA_CONSUMER_GROUP_ID
value: {{ printf "%s-%s" .Release.Name "bihe-consumer-job-client-mcl" }}
{{- end }}
- name: GRAPH_SERVICE_IMPL
value: {{ .Values.global.graph_service_impl }}
{{- if eq .Values.global.graph_service_impl "neo4j" }}
Expand Down
6 changes: 6 additions & 0 deletions charts/datahub/subcharts/datahub-mae-consumer/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ global:
port: "9200"
skipcheck: "false"

## Settings for supporting datahub-upgrade job for index creation/reindex
index:
## The following options control settings for datahub-upgrade job when creating or reindexing indices
upgrade:
enabled: true

kafka:
bootstrap:
server: "broker:9092"
Expand Down
212 changes: 212 additions & 0 deletions charts/datahub/templates/datahub-upgrade/datahub-build-indices-job.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
{{- if .Values.datahubUpgradeBuildIndices.enabled -}}
apiVersion: batch/v1
kind: Job
metadata:
name: {{ .Release.Name }}-datahub-build-indices-job
labels:
app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
app.kubernetes.io/instance: {{ .Release.Name | quote }}
app.kubernetes.io/version: {{ .Chart.AppVersion }}
helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
annotations:
# This is what defines this resource as a hook. Without this line, the
# job is considered part of the release.
"helm.sh/hook": post-install,post-upgrade
"helm.sh/hook-weight": "-5"
"helm.sh/hook-delete-policy": before-hook-creation
spec:
template:
{{- if or .Values.global.podLabels .Values.datahubUpgradeBuildIndices.podAnnotations}}
metadata:
{{- with .Values.datahubUpgradeBuildIndices.podAnnotations }}
annotations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.global.podLabels }}
labels:
{{- range $key, $value := . }}
{{ $key }}: {{ $value | quote }}
{{- end }}
{{- end }}
{{- end }}
spec:
{{- with .Values.global.hostAliases }}
hostAliases:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.datahubUpgradeBuildIndices.serviceAccount }}
serviceAccountName: {{ . }}
{{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
{{- with .Values.global.credentialsAndCertsSecrets }}
- name: datahub-certs-dir
secret:
defaultMode: 0444
secretName: {{ .name }}
{{- end }}
{{- with .Values.datahubUpgradeBuildIndices.extraVolumes }}
{{- toYaml . | nindent 8}}
{{- end }}
restartPolicy: Never
securityContext:
{{- toYaml .Values.datahubUpgradeBuildIndices.podSecurityContext | nindent 8 }}
initContainers:
{{- with .Values.datahubUpgradeBuildIndices.extraInitContainers }}
{{- toYaml . | nindent 12 }}
{{- end }}
containers:
- name: datahub-build-indices-job
image: "{{ .Values.datahubUpgradeBuildIndices.image.repository }}:{{ required "Global or specific tag is required" (.Values.datahubUpgradeBuildIndices.image.tag | default .Values.global.datahub.version) }}"
imagePullPolicy: {{ .Values.datahubUpgradeBuildIndices.imagePullPolicy | default "IfNotPresent" }}
args:
- "-u"
- "BuildIndices"
env:
{{- include "datahub.upgrade.env" . | nindent 12}}
- name: DATAHUB_ANALYTICS_ENABLED
value: {{ .Values.global.datahub_analytics_enabled | quote }}
- name: ENTITY_REGISTRY_CONFIG_PATH
value: /datahub/datahub-gms/resources/entity-registry.yml
- name: EBEAN_DATASOURCE_USERNAME
value: {{ (.Values.sql).datasource.username | default .Values.global.sql.datasource.username | quote }}
- name: EBEAN_DATASOURCE_PASSWORD
{{- $passwordValue := (.Values.sql).datasource.password.value | default .Values.global.sql.datasource.password.value }}
{{- if $passwordValue }}
value: {{ $passwordValue | quote }}
{{- else }}
valueFrom:
secretKeyRef:
name: "{{ (.Values.sql).datasource.password.secretRef | default .Values.global.sql.datasource.password.secretRef }}"
key: "{{ (.Values.sql).datasource.password.secretKey | default .Values.global.sql.datasource.password.secretKey }}"
{{- end }}
- name: EBEAN_DATASOURCE_HOST
value: "{{ .Values.global.sql.datasource.host }}"
- name: EBEAN_DATASOURCE_URL
value: "{{ .Values.global.sql.datasource.url }}"
- name: EBEAN_DATASOURCE_DRIVER
value: "{{ .Values.global.sql.datasource.driver }}"
- name: KAFKA_BOOTSTRAP_SERVER
value: "{{ .Values.global.kafka.bootstrap.server }}"
{{- with .Values.global.kafka.schemaregistry.url }}
- name: KAFKA_SCHEMAREGISTRY_URL
value: "{{ . }}"
{{- end }}
{{- with .Values.global.kafka.schemaregistry.type }}
- name: SCHEMA_REGISTRY_TYPE
value: "{{ . }}"
{{- end }}
{{- with .Values.global.kafka.schemaregistry.glue }}
- name: AWS_GLUE_SCHEMA_REGISTRY_REGION
value: "{{ .region }}"
{{- with .registry }}
- name: AWS_GLUE_SCHEMA_REGISTRY_NAME
value: "{{ . }}"
{{- end }}
{{- end }}
- name: ELASTICSEARCH_HOST
value: "{{ .Values.global.elasticsearch.host }}"
- name: ELASTICSEARCH_PORT
value: "{{ .Values.global.elasticsearch.port }}"
- name: SKIP_ELASTICSEARCH_CHECK
value: "{{ .Values.global.elasticsearch.skipcheck }}"
{{- with .Values.global.elasticsearch.useSSL }}
- name: ELASTICSEARCH_USE_SSL
value: {{ . | quote }}
{{- end }}
{{- with .Values.global.elasticsearch.auth }}
- name: ELASTICSEARCH_USERNAME
value: {{ .username }}
- name: ELASTICSEARCH_PASSWORD
valueFrom:
secretKeyRef:
name: "{{ .password.secretRef }}"
key: "{{ .password.secretKey }}"
{{- end }}
{{- with .Values.global.elasticsearch.indexPrefix }}
- name: INDEX_PREFIX
value: {{ . }}
{{- end }}
- name: ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES
value: {{ .Values.global.elasticsearch.index.upgrade.cloneIndices | quote }}
{{- with .Values.global.elasticsearch.index.enableMappingsReindex }}
- name: ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX
value: {{ . | quote }}
{{- end }}
{{- with .Values.global.elasticsearch.index.enableSettingsReindex }}
- name: ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX
value: {{ . | quote }}
{{- end }}
{{- with .Values.global.elasticsearch.index.settingsOverrides }}
- name: ELASTICSEARCH_INDEX_BUILDER_SETTINGS_OVERRIDES
value: {{ . | quote }}
{{- end }}
{{- with .Values.global.elasticsearch.index.entitySettingsOverrides }}
- name: ELASTICSEARCH_INDEX_BUILDER_ENTITY_SETTINGS_OVERRIDES
value: {{ . | quote }}
{{- end }}
{{- with .Values.global.elasticsearch.index.refreshIntervalSeconds }}
- name: ELASTICSEARCH_INDEX_BUILDER_REFRESH_INTERVAL_SECONDS
value: {{ . | quote }}
{{- end }}
- name: GRAPH_SERVICE_IMPL
value: {{ .Values.global.graph_service_impl }}
{{- if eq .Values.global.graph_service_impl "neo4j" }}
- name: NEO4J_HOST
value: "{{ .Values.global.neo4j.host }}"
- name: NEO4J_URI
value: "{{ .Values.global.neo4j.uri }}"
- name: NEO4J_USERNAME
value: "{{ .Values.global.neo4j.username }}"
- name: NEO4J_PASSWORD
valueFrom:
secretKeyRef:
name: "{{ .Values.global.neo4j.password.secretRef }}"
key: "{{ .Values.global.neo4j.password.secretKey }}"
{{- end }}
{{- if .Values.global.springKafkaConfigurationOverrides }}
{{- range $configName, $configValue := .Values.global.springKafkaConfigurationOverrides }}
- name: SPRING_KAFKA_PROPERTIES_{{ $configName | replace "." "_" | upper }}
value: {{ $configValue | quote }}
{{- end }}
{{- end }}
{{- if .Values.global.credentialsAndCertsSecrets }}
{{- range $envVarName, $envVarValue := .Values.global.credentialsAndCertsSecrets.secureEnv }}
- name: SPRING_KAFKA_PROPERTIES_{{ $envVarName | replace "." "_" | upper }}
valueFrom:
secretKeyRef:
name: {{ $.Values.global.credentialsAndCertsSecrets.name }}
key: {{ $envVarValue }}
{{- end }}
{{- end }}
{{- with .Values.datahubUpgradeBuildIndices.extraEnvs }}
{{- toYaml . | nindent 12 }}
{{- end }}
securityContext:
{{- toYaml .Values.datahubUpgradeBuildIndices.securityContext | nindent 12 }}
volumeMounts:
{{- with .Values.global.credentialsAndCertsSecrets }}
- name: datahub-certs-dir
mountPath: {{ .path | default "/mnt/certs" }}
{{- end }}
{{- with .Values.datahubUpgradeBuildIndices.extraVolumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
resources:
{{- toYaml .Values.datahubUpgradeBuildIndices.resources | nindent 12 }}
{{- with .Values.datahubUpgradeBuildIndices.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.datahubUpgradeBuildIndices.affinity }}
affinity:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.datahubUpgradeBuildIndices.tolerations }}
tolerations:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- end -}}
50 changes: 50 additions & 0 deletions charts/datahub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,27 @@ datahubUpgrade:
cpu: 300m
memory: 256Mi

## Elasticsearch Indices Creation/Reindex
## ** This feature is currently under development **
## See global.elasticsearch.index for additional configuration
datahubUpgradeBuildIndices:
enabled: false
image:
repository: acryldata/datahub-upgrade
# tag:
podSecurityContext: {}
# fsGroup: 1000
securityContext: {}
# runAsUser: 1000
podAnnotations: {}
resources:
limits:
cpu: 500m
memory: 512Mi
requests:
cpu: 300m
memory: 256Mi

global:
graph_service_impl: neo4j
datahub_analytics_enabled: true
Expand All @@ -168,6 +189,35 @@ global:
insecure: "false"
useSSL: "false"

## The following section controls when and how reindexing of elasticsearch indices are performed
index:
## Enable reindexing when mappings change based on the data model annotations
enableMappingsReindex: false

## Enable reindexing when static index settings change.
## Dynamic settings which do not require reindexing are not affected
## Primarily this should be enabled when re-sharding is necessary for scaling/performance.
enableSettingsReindex: false

## Index settings can be overridden for entity indices or other indices on an index by index basis
## Some index settings, such as # of shards, requires reindexing while others, i.e. replicas, do not
## Non-Entity indices do not require the prefix
# settingsOverrides: '{"graph_service_v1":{"number_of_shards":"5"},"system_metadata_service_v1":{"number_of_shards":"5"}}'
## Entity indices do not require the prefix or suffix
# entitySettingsOverrides: '{"dataset":{"number_of_shards":"10"}}'

## The amount of delay between indexing a document and having it returned in queries
## Increasing this value can improve performance when ingesting large amounts of data
# refreshIntervalSeconds: 1

## The following options control settings for datahub-upgrade job when creating or reindexing indices
upgrade:
enabled: true

## When reindexing is required, this option will clone the existing index as a backup
## The clone indices are not currently managed
# cloneIndices: true

kafka:
bootstrap:
server: "prerequisites-kafka:9092"
Expand Down

0 comments on commit 2b637bc

Please sign in to comment.