Fix exposed port in vllm (#19)

* Fix exposed port in vllm * Add port config support * fix arg --------- Signed-off-by: Sanket <sanketsudake@gmail.com>
infracloudio · Sep 20, 2024 · 254a6f3 · 254a6f3
1 parent 62a897c
commit 254a6f3
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 5 deletions.
diff --git a/charts/vllm/Chart.yaml b/charts/vllm/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.0
+version: 0.2.0
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to

diff --git a/charts/vllm/templates/deployment.yaml b/charts/vllm/templates/deployment.yaml
@@ -42,14 +42,18 @@ spec:
           command: {{ .Values.command }}
           {{- end }}
           args:
-            - --model
             {{- if .Values.config.model }}
+            - --model
             - {{ .Values.config.model }}
             {{- end }}
-            - --max-model-len
             {{- if .Values.config.max_model_len }}
+            - --max-model-len
             - {{ .Values.config.max_model_len | quote }}
             {{- end }}
+            {{- if .Values.config.port }}
+            - --port
+            - {{ .Values.config.port | quote }}
+            {{- end }}
             {{- if .Values.args }}
             {{- toYaml .Values.args | nindent 12 }}
             {{- end }}
@@ -59,7 +63,7 @@ spec:
             {{- end }}
           ports:
             - name: http
-              containerPort: {{ .Values.service.port }}
+              containerPort: {{ .Values.config.port }}
               protocol: TCP
           livenessProbe:
             {{- toYaml .Values.livenessProbe | nindent 12 }}

diff --git a/charts/vllm/values.yaml b/charts/vllm/values.yaml
@@ -27,9 +27,12 @@ args: []
 
 config:
   # The name of the model to use. Can be a MODEL_ID as listed on <https://hf.co/models> like `gpt2` or `OpenAssistant/oasst-sft-1-pythia-12b`.
-  # If you mention MODEL_ID, environment variable would take precedence. 
+  # Model ID for model to be used.
   model: "bigscience/bloom-560m"
+  # Model context length
   max_model_len: "4192"
+  # Port to run the service on
+  port: "8000"
 
 env:
   # Reference: https://docs.vllm.ai/en/latest/models/engine_args.html