Remove top_k from vision model UI options

stackhpc · Oct 31, 2024 · f3d5544 · f3d5544
1 parent 35a1438
commit f3d5544
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 10 deletions.
diff --git a/charts/azimuth-image-analysis/azimuth-ui.schema.yaml b/charts/azimuth-image-analysis/azimuth-ui.schema.yaml
@@ -27,5 +27,7 @@ sortOrder:
   - /azimuth-llm/ui/appSettings/llm_params/frequency_penalty
   - /azimuth-llm/ui/appSettings/llm_params/presence_penalty
   - /azimuth-llm/ui/appSettings/llm_params/top_p
-  - /azimuth-llm/ui/appSettings/llm_params/top_k
+  # vLLM responds with HTTP 400 BadRequest when top_k is
+  # passed to a vision model (but ollama accepts it)
+  # - /azimuth-llm/ui/appSettings/llm_params/top_k
   - /azimuth-llm/api/modelMaxContextLength
diff --git a/charts/azimuth-image-analysis/ci/ui-only-values.yaml b/charts/azimuth-image-analysis/ci/ui-only-values.yaml
@@ -11,6 +11,5 @@ azimuth-llm:
         max_tokens: 101
         temperature: 0.1
         top_p: 0.15
-        top_k: 1
         presence_penalty: 0.9
         frequency_penalty: 1
diff --git a/charts/azimuth-image-analysis/values.schema.json b/charts/azimuth-image-analysis/values.schema.json
@@ -58,6 +58,7 @@
                                     "description": "Model name supplied to the OpenAI client in frontend web app. Should match huggingface.model above."
                                 },
                                 "llm_params": {
+                                    "$comment": "top_k parameter causes vLLM to error for most (all?) vision models so is excluded here",
                                     "type": "object",
                                     "properties": {
                                         "max_tokens": {
@@ -82,13 +83,6 @@
                                             "exclusiveMinimum": 0,
                                             "maximum": 1
                                         },
-                                        "top_k": {
-                                            "type": "integer",
-                                            "title": "LLM Top K",
-                                            "description": "The [top k](https://docs.vllm.ai/en/stable/dev/sampling_params.html) value to use when generating LLM responses (must be an integer).",
-                                            "default": -1,
-                                            "minimum": -1
-                                        },
                                         "presence_penalty": {
                                             "type": "number",
                                             "title": "LLM Presence Penalty",

diff --git a/web-apps/image-analysis/defaults.yml b/web-apps/image-analysis/defaults.yml
@@ -18,7 +18,9 @@ llm_params:
   max_tokens:
   temperature:
   top_p:
-  top_k:
+  # vLLM rejects requests with top_k parameter for
+  # most (all?) vision models so can't use it here
+  # top_k:
   frequency_penalty:
   presence_penalty: