Skip to content

Commit

Permalink
feat(template-mcps): Add configuration for datahub-gc (#508)
Browse files Browse the repository at this point in the history
* Runs garbage collection functions on a schedule
  • Loading branch information
david-leifker authored Oct 7, 2024
1 parent 58648eb commit 37d86c3
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 2 deletions.
2 changes: 1 addition & 1 deletion charts/datahub/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ description: A Helm chart for DataHub
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
version: 0.4.27
version: 0.4.28
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application.
appVersion: 0.14.1
Expand Down
60 changes: 60 additions & 0 deletions charts/datahub/templates/datahub-upgrade/_upgrade.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,63 @@ Return the env variables for upgrade jobs
value: {{ .datahub_upgrade_history_topic_name }}
{{- end }}
{{- end -}}

{{- define "deepMerge" -}}
{{- $dst := deepCopy .dst -}}
{{- range $key, $srcValue := .src -}}
{{- if hasKey $dst $key -}}
{{- $dstValue := index $dst $key -}}
{{- if and (kindIs "map" $dstValue) (kindIs "map" $srcValue) -}}
{{- $newDst := dict "dst" $dstValue "src" $srcValue -}}
{{- $mergedValue := include "deepMerge" $newDst | fromYaml -}}
{{- $_ := set $dst $key $mergedValue -}}
{{- else -}}
{{- $_ := set $dst $key $srcValue -}}
{{- end -}}
{{- else -}}
{{- $_ := set $dst $key $srcValue -}}
{{- end -}}
{{- end -}}
{{- $dst | toYaml -}}
{{- end -}}

{{- define "randomHourInRange" -}}
{{- $start := index . 0 -}}
{{- $end := index . 1 -}}

{{- if eq $start $end -}}
{{- $start -}}
{{- else -}}
{{- $range := int64 0 -}}
{{- if lt $end $start -}}
{{- /* Range spans midnight */ -}}
{{- $range = add (sub (int64 24) $start) $end -}}
{{- else -}}
{{- $range = sub $end $start -}}
{{- end -}}
{{- $seed := now | unixEpoch -}}
{{- $randomOffset := mod $seed (add $range 1) -}}
{{- mod (add $start $randomOffset) 24 -}}
{{- end -}}
{{- end -}}

{{/*
datahubGC cron daily custom scheduling
*/}}
{{- define "datahub.systemUpdate.datahubGC.dailyCronWindow" -}}
{{- if .Values.datahubSystemUpdate.bootstrapMCPs.datahubGC.dailyCronWindow.enabled -}}
schedule:
interval: {{ printf "%d %s * * * " (mod (randNumeric 2) 60) (include "randomHourInRange" (list .Values.datahubSystemUpdate.bootstrapMCPs.datahubGC.dailyCronWindow.startHour .Values.datahubSystemUpdate.bootstrapMCPs.datahubGC.dailyCronWindow.endHour)) }}
{{- else }}
schedule:
interval: {{ .Values.datahubSystemUpdate.bootstrapMCPs.datahubGC.values.schedule.interval | quote }}
{{- end }}
{{- end -}}

{{/*
datahubGC timezone
*/}}
{{- define "datahub.systemUpdate.datahubGC.timezone" -}}
schedule:
timezone: {{ .Values.global.datahub.timezone | default .Values.datahubSystemUpdate.bootstrapMCPs.datahubGC.values.schedule.timezone | quote }}
{{- end -}}
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,16 @@ spec:
- name: ELASTICSEARCH_BUILD_INDICES_ALLOW_DOC_COUNT_MISMATCH
value: {{ . | quote }}
{{- end }}
{{- range $k, $v := .Values.datahubSystemUpdate.bootstrapMCPs }}
{{- $result := dict }}
{{- $result = merge $result $v.values }}
{{- range $v.values_generated_configs }}
{{- $funcOutput := include . $ | fromYaml }}
{{- $result = include "deepMerge" (dict "dst" $result "src" $funcOutput) | fromYaml }}
{{- end }}
- name: {{ $v.values_env }}
value: {{ $result | toJson | quote }}
{{- end }}
{{- with .Values.datahubSystemUpdate.extraEnvs }}
{{- toYaml . | nindent 12 }}
{{- end }}
Expand Down
39 changes: 38 additions & 1 deletion charts/datahub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ datahubSystemUpdate:
# steps are completed, the non-blocking job runs while the rest of the
# system is starting.
nonblocking:
enabled: false
enabled: true
# When mode = 'nonblocking' the nonblocking job should not include the above helm.sh/hook annotations
annotations:
# This is what defines this resource as a hook. Without this line, the
Expand All @@ -393,6 +393,40 @@ datahubSystemUpdate:
# to run
# command: customCommand
# args: []
# Depends on v0.14.2 or greater
bootstrapMCPs:
datahubGC:
# For information about this recipe https://datahubproject.io/docs/0.14.0/generated/ingestion/sources/datahubgc/#install-the-plugin
# Overrides values.schedule.interval below with a jitter window using a generated config
dailyCronWindow:
enabled: true
startHour: 18
endHour: 5
# dynamic overrides, the output to each function is deepMerged with values
values_generated_configs:
- "datahub.systemUpdate.datahubGC.dailyCronWindow"
- "datahub.systemUpdate.datahubGC.timezone"
# Environment variable containing the json value for the template mcp
values_env: DATAHUB_GC_BOOTSTRAP_VALUES
# Base values for the template mcp
values:
ingestion:
name: datahub-gc
schedule:
# overridden by global.datahub.timezone
timezone: "UTC"
# overridden if dynamic.dailyCronWindow.enable is true
interval: "0 1 * * *"
cleanup_expired_tokens: false
truncate_indices: true
dataprocess_cleanup:
retention_days: 30
delete_empty_data_jobs: true
delete_empty_data_flows: true
hard_delete_entities: false
keep_last_n: 10
soft_deleted_entities_cleanup:
retention_days: 30
podAnnotations: {}
resources:
limits:
Expand Down Expand Up @@ -700,6 +734,9 @@ global:
port: "8080"
nodePort: "30001"

# Used for scheduled tasks
timezone: "UTC"

frontend:
validateSignUpEmail: true

Expand Down

0 comments on commit 37d86c3

Please sign in to comment.