From 12df1a3b2a5df23adacfd9a64ca6df6f7b105d3c Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Tue, 5 Nov 2024 22:49:26 -0800
Subject: [PATCH] Show attachment types in llm models --options, closes #612

---
 docs/aliases.md                      |   4 +-
 docs/openai-models.md                |   6 +-
 docs/usage.md                        | 358 ++++++++++++++-------------
 llm/cli.py                           |  29 ++-
 llm/default_plugins/openai_models.py |   9 +-
 tests/test_llm.py                    |  59 +++--
 6 files changed, 250 insertions(+), 215 deletions(-)

diff --git a/docs/aliases.md b/docs/aliases.md
index fe86288e..787a3034 100644
--- a/docs/aliases.md
+++ b/docs/aliases.md
@@ -19,6 +19,8 @@ result = CliRunner().invoke(cli, ["aliases", "list"])
 cog.out("```\n{}```".format(result.output))
 ]]] -->
 ```
+4o                  : gpt-4o
+4o-mini             : gpt-4o-mini
 3.5                 : gpt-3.5-turbo
 chatgpt             : gpt-3.5-turbo
 chatgpt-16k         : gpt-3.5-turbo-16k
@@ -29,8 +31,6 @@ gpt4                : gpt-4
 gpt-4-turbo-preview : gpt-4-turbo
 4-turbo             : gpt-4-turbo
 4t                  : gpt-4-turbo
-4o                  : gpt-4o
-4o-mini             : gpt-4o-mini
 3.5-instruct        : gpt-3.5-turbo-instruct
 chatgpt-instruct    : gpt-3.5-turbo-instruct
 ada                 : ada-002 (embedding)
diff --git a/docs/openai-models.md b/docs/openai-models.md
index e1d90083..eda613ad 100644
--- a/docs/openai-models.md
+++ b/docs/openai-models.md
@@ -31,6 +31,9 @@ models = [line for line in result.output.split("\n") if line.startswith("OpenAI
 cog.out("```\n{}\n```".format("\n".join(models)))
 ]]] -->
 ```
+OpenAI Chat: gpt-4o (aliases: 4o)
+OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
+OpenAI Chat: gpt-4o-audio-preview
 OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
 OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
 OpenAI Chat: gpt-4 (aliases: 4, gpt4)
@@ -39,9 +42,6 @@ OpenAI Chat: gpt-4-1106-preview
 OpenAI Chat: gpt-4-0125-preview
 OpenAI Chat: gpt-4-turbo-2024-04-09
 OpenAI Chat: gpt-4-turbo (aliases: gpt-4-turbo-preview, 4-turbo, 4t)
-OpenAI Chat: gpt-4o (aliases: 4o)
-OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
-OpenAI Chat: gpt-4o-audio-preview
 OpenAI Chat: o1-preview
 OpenAI Chat: o1-mini
 OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
diff --git a/docs/usage.md b/docs/usage.md
index ea15e453..f5d80d11 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -247,186 +247,206 @@ result = CliRunner().invoke(cli, ["models", "list", "--options"])
 cog.out("```\n{}\n```".format(result.output))
 ]]] -->
 ```
+OpenAI Chat: gpt-4o (aliases: 4o)
+  Options:
+    temperature: float
+      What sampling temperature to use, between 0 and 2. Higher values like
+      0.8 will make the output more random, while lower values like 0.2 will
+      make it more focused and deterministic.
+    max_tokens: int
+      Maximum number of tokens to generate.
+    top_p: float
+      An alternative to sampling with temperature, called nucleus sampling,
+      where the model considers the results of the tokens with top_p
+      probability mass. So 0.1 means only the tokens comprising the top 10%
+      probability mass are considered. Recommended to use top_p or
+      temperature but not both.
+    frequency_penalty: float
+      Number between -2.0 and 2.0. Positive values penalize new tokens based
+      on their existing frequency in the text so far, decreasing the model's
+      likelihood to repeat the same line verbatim.
+    presence_penalty: float
+      Number between -2.0 and 2.0. Positive values penalize new tokens based
+      on whether they appear in the text so far, increasing the model's
+      likelihood to talk about new topics.
+    stop: str
+      A string where the API will stop generating further tokens.
+    logit_bias: dict, str
+      Modify the likelihood of specified tokens appearing in the completion.
+      Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
+    seed: int
+      Integer seed to attempt to sample deterministically
+    json_object: boolean
+      Output a valid JSON object {...}. Prompt must mention JSON.
+  Attachment types:
+    image/png, image/gif, image/webp, image/jpeg
+OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
+  Attachment types:
+    image/png, image/gif, image/webp, image/jpeg
+OpenAI Chat: gpt-4o-audio-preview
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
+  Attachment types:
+    audio/mpeg, audio/wave
 OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
-  temperature: float
-    What sampling temperature to use, between 0 and 2. Higher values like
-    0.8 will make the output more random, while lower values like 0.2 will
-    make it more focused and deterministic.
-  max_tokens: int
-    Maximum number of tokens to generate.
-  top_p: float
-    An alternative to sampling with temperature, called nucleus sampling,
-    where the model considers the results of the tokens with top_p
-    probability mass. So 0.1 means only the tokens comprising the top 10%
-    probability mass are considered. Recommended to use top_p or
-    temperature but not both.
-  frequency_penalty: float
-    Number between -2.0 and 2.0. Positive values penalize new tokens based
-    on their existing frequency in the text so far, decreasing the model's
-    likelihood to repeat the same line verbatim.
-  presence_penalty: float
-    Number between -2.0 and 2.0. Positive values penalize new tokens based
-    on whether they appear in the text so far, increasing the model's
-    likelihood to talk about new topics.
-  stop: str
-    A string where the API will stop generating further tokens.
-  logit_bias: dict, str
-    Modify the likelihood of specified tokens appearing in the completion.
-    Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
-  seed: int
-    Integer seed to attempt to sample deterministically
-  json_object: boolean
-    Output a valid JSON object {...}. Prompt must mention JSON.
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: gpt-3.5-turbo-16k (aliases: chatgpt-16k, 3.5-16k)
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: gpt-4 (aliases: 4, gpt4)
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: gpt-4-32k (aliases: 4-32k)
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: gpt-4-1106-preview
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: gpt-4-0125-preview
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: gpt-4-turbo-2024-04-09
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: gpt-4-turbo (aliases: gpt-4-turbo-preview, 4-turbo, 4t)
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
-OpenAI Chat: gpt-4o (aliases: 4o)
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
-OpenAI Chat: gpt-4o-mini (aliases: 4o-mini)
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
-OpenAI Chat: gpt-4o-audio-preview
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: o1-preview
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Chat: o1-mini
-  temperature: float
-  max_tokens: int
-  top_p: float
-  frequency_penalty: float
-  presence_penalty: float
-  stop: str
-  logit_bias: dict, str
-  seed: int
-  json_object: boolean
+  Options:
+    temperature: float
+    max_tokens: int
+    top_p: float
+    frequency_penalty: float
+    presence_penalty: float
+    stop: str
+    logit_bias: dict, str
+    seed: int
+    json_object: boolean
 OpenAI Completion: gpt-3.5-turbo-instruct (aliases: 3.5-instruct, chatgpt-instruct)
-  temperature: float
-    What sampling temperature to use, between 0 and 2. Higher values like
-    0.8 will make the output more random, while lower values like 0.2 will
-    make it more focused and deterministic.
-  max_tokens: int
-    Maximum number of tokens to generate.
-  top_p: float
-    An alternative to sampling with temperature, called nucleus sampling,
-    where the model considers the results of the tokens with top_p
-    probability mass. So 0.1 means only the tokens comprising the top 10%
-    probability mass are considered. Recommended to use top_p or
-    temperature but not both.
-  frequency_penalty: float
-    Number between -2.0 and 2.0. Positive values penalize new tokens based
-    on their existing frequency in the text so far, decreasing the model's
-    likelihood to repeat the same line verbatim.
-  presence_penalty: float
-    Number between -2.0 and 2.0. Positive values penalize new tokens based
-    on whether they appear in the text so far, increasing the model's
-    likelihood to talk about new topics.
-  stop: str
-    A string where the API will stop generating further tokens.
-  logit_bias: dict, str
-    Modify the likelihood of specified tokens appearing in the completion.
-    Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
-  seed: int
-    Integer seed to attempt to sample deterministically
-  logprobs: int
-    Include the log probabilities of most likely N per token
+  Options:
+    temperature: float
+      What sampling temperature to use, between 0 and 2. Higher values like
+      0.8 will make the output more random, while lower values like 0.2 will
+      make it more focused and deterministic.
+    max_tokens: int
+      Maximum number of tokens to generate.
+    top_p: float
+      An alternative to sampling with temperature, called nucleus sampling,
+      where the model considers the results of the tokens with top_p
+      probability mass. So 0.1 means only the tokens comprising the top 10%
+      probability mass are considered. Recommended to use top_p or
+      temperature but not both.
+    frequency_penalty: float
+      Number between -2.0 and 2.0. Positive values penalize new tokens based
+      on their existing frequency in the text so far, decreasing the model's
+      likelihood to repeat the same line verbatim.
+    presence_penalty: float
+      Number between -2.0 and 2.0. Positive values penalize new tokens based
+      on whether they appear in the text so far, increasing the model's
+      likelihood to talk about new topics.
+    stop: str
+      A string where the API will stop generating further tokens.
+    logit_bias: dict, str
+      Modify the likelihood of specified tokens appearing in the completion.
+      Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
+    seed: int
+      Integer seed to attempt to sample deterministically
+    logprobs: int
+      Include the log probabilities of most likely N per token
 
 ```
 <!-- [[[end]]] -->
diff --git a/llm/cli.py b/llm/cli.py
index d4547199..3506b877 100644
--- a/llm/cli.py
+++ b/llm/cli.py
@@ -961,11 +961,11 @@ def models_list(options):
         extra = ""
         if model_with_aliases.aliases:
             extra = " (aliases: {})".format(", ".join(model_with_aliases.aliases))
-        output = str(model_with_aliases.model) + extra
-        if options and model_with_aliases.model.Options.schema()["properties"]:
-            for name, field in model_with_aliases.model.Options.schema()[
-                "properties"
-            ].items():
+        model = model_with_aliases.model
+        output = str(model) + extra
+        if options and model.Options.model_json_schema()["properties"]:
+            output += "\n  Options:"
+            for name, field in model.Options.model_json_schema()["properties"].items():
                 any_of = field.get("anyOf")
                 if any_of is None:
                     any_of = [{"type": field["type"]}]
@@ -976,17 +976,24 @@ def models_list(options):
                         if item["type"] != "null"
                     ]
                 )
-                bits = ["\n  ", name, ": ", types]
+                bits = ["\n    ", name, ": ", types]
                 description = field.get("description", "")
                 if description and (
-                    model_with_aliases.model.__class__
-                    not in models_that_have_shown_options
+                    model.__class__ not in models_that_have_shown_options
                 ):
                     wrapped = textwrap.wrap(description, 70)
-                    bits.append("\n    ")
-                    bits.extend("\n    ".join(wrapped))
+                    bits.append("\n      ")
+                    bits.extend("\n      ".join(wrapped))
                 output += "".join(bits)
-            models_that_have_shown_options.add(model_with_aliases.model.__class__)
+            models_that_have_shown_options.add(model.__class__)
+        if options and model.attachment_types:
+            attachment_types = ", ".join(sorted(model.attachment_types))
+            wrapper = textwrap.TextWrapper(
+                width=min(max(shutil.get_terminal_size().columns, 30), 70),
+                initial_indent="    ",
+                subsequent_indent="    ",
+            )
+            output += "\n  Attachment types:\n{}".format(wrapper.fill(attachment_types))
         click.echo(output)
 
 
diff --git a/llm/default_plugins/openai_models.py b/llm/default_plugins/openai_models.py
index 6944df6c..4229553c 100644
--- a/llm/default_plugins/openai_models.py
+++ b/llm/default_plugins/openai_models.py
@@ -23,6 +23,11 @@
 
 @hookimpl
 def register_models(register):
+    # GPT-4o
+    register(Chat("gpt-4o", vision=True), aliases=("4o",))
+    register(Chat("gpt-4o-mini", vision=True), aliases=("4o-mini",))
+    register(Chat("gpt-4o-audio-preview", audio=True))
+    # 3.5 and 4
     register(Chat("gpt-3.5-turbo"), aliases=("3.5", "chatgpt"))
     register(Chat("gpt-3.5-turbo-16k"), aliases=("chatgpt-16k", "3.5-16k"))
     register(Chat("gpt-4"), aliases=("4", "gpt4"))
@@ -32,10 +37,6 @@ def register_models(register):
     register(Chat("gpt-4-0125-preview"))
     register(Chat("gpt-4-turbo-2024-04-09"))
     register(Chat("gpt-4-turbo"), aliases=("gpt-4-turbo-preview", "4-turbo", "4t"))
-    # GPT-4o
-    register(Chat("gpt-4o", vision=True), aliases=("4o",))
-    register(Chat("gpt-4o-mini", vision=True), aliases=("4o-mini",))
-    register(Chat("gpt-4o-audio-preview", audio=True))
     # o1
     register(Chat("o1-preview", can_stream=False, allows_system_prompt=False))
     register(Chat("o1-mini", can_stream=False, allows_system_prompt=False))
diff --git a/tests/test_llm.py b/tests/test_llm.py
index c303061d..a0058713 100644
--- a/tests/test_llm.py
+++ b/tests/test_llm.py
@@ -514,32 +514,39 @@ def test_openai_localai_configuration(mocked_localai, user_path):
 
 
 EXPECTED_OPTIONS = """
-OpenAI Chat: gpt-3.5-turbo (aliases: 3.5, chatgpt)
-  temperature: float
-    What sampling temperature to use, between 0 and 2. Higher values like
-    0.8 will make the output more random, while lower values like 0.2 will
-    make it more focused and deterministic.
-  max_tokens: int
-    Maximum number of tokens to generate.
-  top_p: float
-    An alternative to sampling with temperature, called nucleus sampling,
-    where the model considers the results of the tokens with top_p
-    probability mass. So 0.1 means only the tokens comprising the top 10%
-    probability mass are considered. Recommended to use top_p or
-    temperature but not both.
-  frequency_penalty: float
-    Number between -2.0 and 2.0. Positive values penalize new tokens based
-    on their existing frequency in the text so far, decreasing the model's
-    likelihood to repeat the same line verbatim.
-  presence_penalty: float
-    Number between -2.0 and 2.0. Positive values penalize new tokens based
-    on whether they appear in the text so far, increasing the model's
-    likelihood to talk about new topics.
-  stop: str
-    A string where the API will stop generating further tokens.
-  logit_bias: dict, str
-    Modify the likelihood of specified tokens appearing in the completion.
-    Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
+OpenAI Chat: gpt-4o (aliases: 4o)
+  Options:
+    temperature: float
+      What sampling temperature to use, between 0 and 2. Higher values like
+      0.8 will make the output more random, while lower values like 0.2 will
+      make it more focused and deterministic.
+    max_tokens: int
+      Maximum number of tokens to generate.
+    top_p: float
+      An alternative to sampling with temperature, called nucleus sampling,
+      where the model considers the results of the tokens with top_p
+      probability mass. So 0.1 means only the tokens comprising the top 10%
+      probability mass are considered. Recommended to use top_p or
+      temperature but not both.
+    frequency_penalty: float
+      Number between -2.0 and 2.0. Positive values penalize new tokens based
+      on their existing frequency in the text so far, decreasing the model's
+      likelihood to repeat the same line verbatim.
+    presence_penalty: float
+      Number between -2.0 and 2.0. Positive values penalize new tokens based
+      on whether they appear in the text so far, increasing the model's
+      likelihood to talk about new topics.
+    stop: str
+      A string where the API will stop generating further tokens.
+    logit_bias: dict, str
+      Modify the likelihood of specified tokens appearing in the completion.
+      Pass a JSON string like '{"1712":-100, "892":-100, "1489":-100}'
+    seed: int
+      Integer seed to attempt to sample deterministically
+    json_object: boolean
+      Output a valid JSON object {...}. Prompt must mention JSON.
+  Attachment types:
+    image/gif, image/jpeg, image/png, image/webp
 """