From 485b9542d9a0d47198bbb12474df26f822bab08c Mon Sep 17 00:00:00 2001 From: AlanFokCo <892249240@qq.com> Date: Thu, 8 Sep 2022 11:18:12 +0800 Subject: [PATCH 1/2] add podantiaffinity for tfjob ps pod --- charts/tfjob/templates/tfjob.yaml | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/charts/tfjob/templates/tfjob.yaml b/charts/tfjob/templates/tfjob.yaml index f77994381..492259af7 100644 --- a/charts/tfjob/templates/tfjob.yaml +++ b/charts/tfjob/templates/tfjob.yaml @@ -43,6 +43,7 @@ spec: release: {{ .Release.Name }} heritage: {{ .Release.Service }} createdBy: "TFJob" + role: "ps" {{- if .Values.podGroupName }} pod-group.scheduling.sigs.k8s.io/name: {{ .Values.podGroupName }} pod-group.scheduling.sigs.k8s.io/min-available: "{{ .Values.podGroupMinAvailable }}" @@ -108,15 +109,30 @@ spec: operator: In values: - worker - - weight: 30 + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 podAffinityTerm: topologyKey: kubernetes.io/hostname labelSelector: matchExpressions: - - key: tf-replica-type + - key: role operator: In values: - - ps + - ps + {{- else }} + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: kubernetes.io/hostname + labelSelector: + matchExpressions: + - key: role + operator: In + values: + - ps {{- end }} {{- if .Values.useHostNetwork }} {{- if not .Values.useENI }} From cf39257a353b766f83ccac3e0bb879d5462dd806 Mon Sep 17 00:00:00 2001 From: AlanFokCo <892249240@qq.com> Date: Thu, 8 Sep 2022 11:30:18 +0800 Subject: [PATCH 2/2] fix serving chart template --- charts/modeljob/templates/job.yaml | 10 +++++----- charts/seldon-core/templates/seldondeployment.yaml | 8 ++++---- charts/trtserving/templates/deployment.yaml | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/charts/modeljob/templates/job.yaml b/charts/modeljob/templates/job.yaml index c926eea70..869091688 100644 --- a/charts/modeljob/templates/job.yaml +++ b/charts/modeljob/templates/job.yaml @@ -149,12 +149,12 @@ spec: {{- if gt (int $gpuCount) 0}} nvidia.com/gpu: {{ .Values.gpuCount }} {{- end }} - {{ - if gt (int $gpuMemory) 0 }} - aliyun.com/gpu-mem: { { .Values.gpuMemory }} - {{ - end } } - {{ - if gt (int $gpuCore) 0 }} + {{- if gt (int $gpuMemory) 0}} + aliyun.com/gpu-mem: {{ .Values.gpuMemory }} + {{- end }} + {{- if gt (int $gpuCore) 0}} aliyun.com/gpu-core.percentage: {{ .Values.gpuCore }} - {{ - end }} + {{- end }} volumeMounts: {{- if .Values.dataset }} {{- range $pvcName, $mntPath := .Values.dataset}} diff --git a/charts/seldon-core/templates/seldondeployment.yaml b/charts/seldon-core/templates/seldondeployment.yaml index 2a42d431d..1a2105ef8 100644 --- a/charts/seldon-core/templates/seldondeployment.yaml +++ b/charts/seldon-core/templates/seldondeployment.yaml @@ -41,12 +41,12 @@ spec: {{- if gt (int $gpuCount) 0 }} nvidia.com/gpu: {{ .Values.gpuCount }} {{- end }} - {{ - if gt (int $gpuMemory) 0 }} + {{- if gt (int $gpuMemory) 0 }} aliyun.com/gpu-mem: {{ .Values.gpuMemory }} - {{ - end } } - {{ - if gt (int $gpuCore) 0 }} + {{- end }} + {{- if gt (int $gpuCore) 0 }} aliyun.com/gpu-core.percentage: {{ .Values.gpuCore }} - {{ - end }} + {{- end }} graph: implementation: {{ .Values.implementation }} modelUri: {{ .Values.modelUri }} diff --git a/charts/trtserving/templates/deployment.yaml b/charts/trtserving/templates/deployment.yaml index f0bb2c79e..c04c51a15 100755 --- a/charts/trtserving/templates/deployment.yaml +++ b/charts/trtserving/templates/deployment.yaml @@ -98,9 +98,9 @@ spec: {{- if gt (int $gpuMemory) 0}} aliyun.com/gpu-mem: {{ .Values.gpuMemory }} {{- end }} - {{ - if gt (int $gpuCore) 0 }} + {{- if gt (int $gpuCore) 0}} aliyun.com/gpu-core.percentage: {{ .Values.gpuCore }} - {{ - end }} + {{- end }} volumeMounts: {{- if .Values.modelDirs }} {{- range $pvcName, $destPath := .Values.modelDirs}}