Skip to content

Commit

Permalink
Merge pull request #31 from stackhpc/customisation-options
Browse files Browse the repository at this point in the history
Additional customisation options for Azimuth app
  • Loading branch information
sd109 authored Jul 12, 2024
2 parents 42ed6f6 + 76e0833 commit 311b3a4
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Tiltfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Toggles whether UI should be run locally using gradio hot-reloading
# or should be included in the remote Helm install
run_ui_locally = True
run_ui_locally = os.getenv("AZIMUTH_LLM_TILT_LOCAL_UI", True)

# Allow non-local contexts
allow_k8s_contexts(k8s_context())
Expand Down
15 changes: 15 additions & 0 deletions chart/azimuth-ui.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,25 @@ controls:
type: MirrorControl
path: /huggingface/model
visuallyHidden: true
# Azimuth UI doesn't handle json type ["integer","null"]
# properly so we allow any type in JSON schema then
# constrain to (optional) integer here.
/api/modelMaxContextLength:
type: IntegerControl
minimum: 100
step: 100
required: false

sortOrder:
- /huggingface/model
- /huggingface/token
- /ui/appSettings/hf_model_instruction
- /ui/appSettings/page_title
- /api/image/version
- /ui/appSettings/llm_temperature
- /ui/appSettings/llm_max_tokens
- /ui/appSettings/llm_frequency_penalty
- /ui/appSettings/llm_presence_penalty
- /ui/appSettings/llm_top_p
- /ui/appSettings/llm_top_k
- /api/modelMaxContextLength
4 changes: 4 additions & 0 deletions chart/templates/api/deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ spec:
- --model
- {{ .Values.huggingface.model }}
{{- include "azimuth-llm.chatTemplate" . | nindent 10 }}
{{- if .Values.api.modelMaxContextLength -}}
- --max-model-len
- {{ .Values.api.modelMaxContextLength | quote }}
{{- end -}}
{{- if .Values.api.extraArgs -}}
{{- .Values.api.extraArgs | toYaml | nindent 10 }}
{{- end -}}
Expand Down
20 changes: 20 additions & 0 deletions chart/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,26 @@
"required": ["hf_model_name", "hf_model_instruction"]
}
}
},
"api": {
"type": "object",
"properties": {
"modelMaxContextLength": {
"title": "Model Context Length",
"description": "An override for the maximum context length to allow, if the model's default is not suitable."
},
"image": {
"type": "object",
"properties": {
"version": {
"type": "string",
"title": "Backend vLLM version",
"description": "The vLLM version to use as a backend. Must be a version tag from [this list](https://github.com/vllm-project/vllm/tags)",
"default": "v0.4.3"
}
}
}
}
}
}
}
9 changes: 8 additions & 1 deletion chart/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ api:
iconUrl: https://raw.githubusercontent.com/vllm-project/vllm/v0.2.7/docs/source/assets/logos/vllm-logo-only-light.png
description: |
The raw inference API endpoints for the deployed LLM.
# Config for huggingface model cache volume
# This is mounted at /root/.cache/huggingface in the api deployment
cacheVolume:
hostPath:
path: /tmp/llm/huggingface-cache

# Number of gpus to requests for each api pod instance
# NOTE: This must be in the range 1 <= value <= N, where
# 'N' is the number of GPUs available in a single
Expand All @@ -71,8 +73,13 @@ api:
# to preform a rolling zero-downtime update
updateStrategy:
type: Recreate

# The value of the vLLM backend's max_model_len argument (if the model's default is not suitable)
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
modelMaxContextLength:

# Extra args to supply to the vLLM backend, see
# https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/api_server.py
# https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html#command-line-arguments-for-the-server
extraArgs: []

# Configuration for the frontend web interface
Expand Down

0 comments on commit 311b3a4

Please sign in to comment.