Skip to content

Commit

Permalink
Fix exposed port in vllm (#19)
Browse files Browse the repository at this point in the history
* Fix exposed port in vllm
* Add port config support
* fix arg

---------

Signed-off-by: Sanket <sanketsudake@gmail.com>
  • Loading branch information
sanketsudake authored Sep 20, 2024
1 parent 62a897c commit 254a6f3
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 5 deletions.
2 changes: 1 addition & 1 deletion charts/vllm/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
version: 0.2.0

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
Expand Down
10 changes: 7 additions & 3 deletions charts/vllm/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,18 @@ spec:
command: {{ .Values.command }}
{{- end }}
args:
- --model
{{- if .Values.config.model }}
- --model
- {{ .Values.config.model }}
{{- end }}
- --max-model-len
{{- if .Values.config.max_model_len }}
- --max-model-len
- {{ .Values.config.max_model_len | quote }}
{{- end }}
{{- if .Values.config.port }}
- --port
- {{ .Values.config.port | quote }}
{{- end }}
{{- if .Values.args }}
{{- toYaml .Values.args | nindent 12 }}
{{- end }}
Expand All @@ -59,7 +63,7 @@ spec:
{{- end }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
containerPort: {{ .Values.config.port }}
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
Expand Down
5 changes: 4 additions & 1 deletion charts/vllm/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ args: []

config:
# The name of the model to use. Can be a MODEL_ID as listed on <https://hf.co/models> like `gpt2` or `OpenAssistant/oasst-sft-1-pythia-12b`.
# If you mention MODEL_ID, environment variable would take precedence.
# Model ID for model to be used.
model: "bigscience/bloom-560m"
# Model context length
max_model_len: "4192"
# Port to run the service on
port: "8000"

env:
# Reference: https://docs.vllm.ai/en/latest/models/engine_args.html
Expand Down

0 comments on commit 254a6f3

Please sign in to comment.