Skip to content

Commit

Permalink
Additional customisation options for Azimuth app
Browse files Browse the repository at this point in the history
Adds the ability to customise vLLM version and model context length to the Azimuth UI form.
  • Loading branch information
sd109 committed Jul 12, 2024
1 parent bb254a0 commit 5a9e883
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 1 deletion.
7 changes: 7 additions & 0 deletions chart/azimuth-ui.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,11 @@ sortOrder:
- /huggingface/token
- /ui/appSettings/hf_model_instruction
- /ui/appSettings/page_title
- /api/image/version
- /ui/appSettings/llm_temperature
- /ui/appSettings/llm_max_tokens
- /ui/appSettings/llm_frequency_penalty
- /ui/appSettings/llm_presence_penalty
- /ui/appSettings/llm_top_p
- /ui/appSettings/llm_top_k
- /api/modelMaxContextLength
4 changes: 4 additions & 0 deletions chart/templates/api/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ spec:
- --model
- {{ .Values.huggingface.model }}
{{- include "azimuth-llm.chatTemplate" . | nindent 10 }}
{{- if .Values.api.modelMaxContextLength -}}
- --max-model-len
- {{ .Values.api.modelMaxContextLength }}
{{- end -}}
{{- if .Values.api.extraArgs -}}
{{- .Values.api.extraArgs | toYaml | nindent 10 }}
{{- end -}}
Expand Down
19 changes: 19 additions & 0 deletions chart/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,25 @@
"required": ["hf_model_name", "hf_model_instruction"]
}
}
},
"api": {
"properties": {
"modelMaxContextLength": {
"type": "integer",
"title": "Model Context Length",
"description": "An override for the maximum context length to use if the model's default is not suitable."
},
"image": {
"properties": {
"version": {
"type": "string",
"title": "vLLM version override",
"description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
"default": "v0.4.3"
}
}
}
}
}
}
}
9 changes: 8 additions & 1 deletion chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ api:
iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
description: |
The raw inference API endpoints for the deployed LLM.
# Config for huggingface model cache volume
# This is mounted at /root/.cache/huggingface in the api deployment
cacheVolume:
hostPath:
path: /tmp/llm/huggingface-cache

# Number of gpus to requests for each api pod instance
# NOTE: This must be in the range 1 <= value <= N, where
# 'N' is the number of GPUs available in a single
Expand All @@ -71,8 +73,13 @@ api:
# to preform a rolling zero-downtime update
updateStrategy:
type: Recreate

# The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
modelMaxContextLength:

# Extra args to supply to the vLLM backend, see
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/api_server.py
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
extraArgs: []

# Configuration for the frontend web interface
Expand Down

0 comments on commit 5a9e883

Please sign in to comment.