|
| 1 | +{{/* |
| 2 | +Latency Predictor Env |
| 3 | +*/}} |
| 4 | +{{- define "gateway-api-inference-extension.latencyPredictor.env" -}} |
| 5 | +{{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 6 | +- name: PREDICTION_SERVER_URL |
| 7 | + value: "{{- $count := int .Values.inferenceExtension.latencyPredictor.predictionServers.count -}} |
| 8 | + {{- $startPort := int .Values.inferenceExtension.latencyPredictor.predictionServers.startPort -}} |
| 9 | + {{- range $i := until $count -}} |
| 10 | + {{- if $i }},{{ end }}http://localhost:{{ add $startPort $i }} |
| 11 | + {{- end }}" |
| 12 | +- name: TRAINING_SERVER_URL |
| 13 | + value: "http://localhost:{{ .Values.inferenceExtension.latencyPredictor.trainingServer.port }}" |
| 14 | +{{- range $key, $value := .Values.inferenceExtension.latencyPredictor.eppEnv }} |
| 15 | +- name: {{ $key }} |
| 16 | + value: {{ $value | quote }} |
| 17 | +{{- end }} |
| 18 | +{{- end }} |
| 19 | +{{- end }} |
| 20 | + |
| 21 | +{{/* |
| 22 | +Latency Predictor Sidecar Containers |
| 23 | +*/}} |
| 24 | +{{- define "gateway-api-inference-extension.latencyPredictor.containers" -}} |
| 25 | +{{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 26 | +# Training Server Sidecar Container |
| 27 | +- name: training-server |
| 28 | + image: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.hub }}/{{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.name }}:{{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.tag }} |
| 29 | + imagePullPolicy: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.image.pullPolicy }} |
| 30 | + ports: |
| 31 | + - containerPort: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.port }} |
| 32 | + name: training-port |
| 33 | + livenessProbe: |
| 34 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.livenessProbe | nindent 4 }} |
| 35 | + readinessProbe: |
| 36 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.readinessProbe | nindent 4 }} |
| 37 | + resources: |
| 38 | + {{- toYaml .Values.inferenceExtension.latencyPredictor.trainingServer.resources | nindent 4 }} |
| 39 | + envFrom: |
| 40 | + - configMapRef: |
| 41 | + name: {{ include "gateway-api-inference-extension.name" . }}-latency-predictor-training |
| 42 | + env: |
| 43 | + - name: POD_NAME |
| 44 | + valueFrom: |
| 45 | + fieldRef: |
| 46 | + fieldPath: metadata.name |
| 47 | + - name: SERVER_TYPE |
| 48 | + value: "training" |
| 49 | + volumeMounts: |
| 50 | + - name: training-server-storage |
| 51 | + mountPath: /models |
| 52 | +{{- range $i := until (int .Values.inferenceExtension.latencyPredictor.predictionServers.count) }} |
| 53 | +# Prediction Server Sidecar Container {{ add $i 1 }} |
| 54 | +- name: prediction-server-{{ add $i 1 }} |
| 55 | + image: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.hub }}/{{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.name }}:{{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.tag }} |
| 56 | + imagePullPolicy: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.image.pullPolicy }} |
| 57 | + command: ["uvicorn"] |
| 58 | + args: ["prediction_server:app", "--host", "0.0.0.0", "--port", "{{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }}"] |
| 59 | + ports: |
| 60 | + - containerPort: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 61 | + name: predict-port-{{ add $i 1 }} |
| 62 | + livenessProbe: |
| 63 | + httpGet: |
| 64 | + path: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.httpGet.path }} |
| 65 | + port: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 66 | + initialDelaySeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.initialDelaySeconds }} |
| 67 | + periodSeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.livenessProbe.periodSeconds }} |
| 68 | + readinessProbe: |
| 69 | + httpGet: |
| 70 | + path: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.httpGet.path }} |
| 71 | + port: {{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }} |
| 72 | + initialDelaySeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.initialDelaySeconds }} |
| 73 | + periodSeconds: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.periodSeconds }} |
| 74 | + failureThreshold: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.readinessProbe.failureThreshold }} |
| 75 | + resources: |
| 76 | + {{- toYaml $.Values.inferenceExtension.latencyPredictor.predictionServers.resources | nindent 4 }} |
| 77 | + envFrom: |
| 78 | + - configMapRef: |
| 79 | + name: {{ include "gateway-api-inference-extension.name" $ }}-latency-predictor-prediction |
| 80 | + env: |
| 81 | + - name: PREDICT_PORT |
| 82 | + value: "{{ add $.Values.inferenceExtension.latencyPredictor.predictionServers.startPort $i }}" |
| 83 | + - name: POD_NAME |
| 84 | + valueFrom: |
| 85 | + fieldRef: |
| 86 | + fieldPath: metadata.name |
| 87 | + - name: SERVER_TYPE |
| 88 | + value: "prediction-{{ add $i 1 }}" |
| 89 | + - name: TRAINING_SERVER_URL |
| 90 | + value: "http://localhost:{{ $.Values.inferenceExtension.latencyPredictor.trainingServer.port }}" |
| 91 | + volumeMounts: |
| 92 | + - name: prediction-server-{{ add $i 1 }}-storage |
| 93 | + mountPath: /server_models |
| 94 | +{{- end }} |
| 95 | +{{- end }} |
| 96 | +{{- end }} |
| 97 | + |
| 98 | +{{/* |
| 99 | +Latency Predictor Volumes |
| 100 | +*/}} |
| 101 | +{{- define "gateway-api-inference-extension.latencyPredictor.volumes" -}} |
| 102 | +{{- if .Values.inferenceExtension.latencyPredictor.enabled }} |
| 103 | +- name: training-server-storage |
| 104 | + emptyDir: |
| 105 | + sizeLimit: {{ .Values.inferenceExtension.latencyPredictor.trainingServer.volumeSize }} |
| 106 | +{{- range $i := until (int .Values.inferenceExtension.latencyPredictor.predictionServers.count) }} |
| 107 | +- name: prediction-server-{{ add $i 1 }}-storage |
| 108 | + emptyDir: |
| 109 | + sizeLimit: {{ $.Values.inferenceExtension.latencyPredictor.predictionServers.volumeSize }} |
| 110 | +{{- end }} |
| 111 | +{{- end }} |
| 112 | +{{- end }} |
0 commit comments