Merge pull request #114 from fixie-ai/juberti/input-tokens

Use clearer tools prompt and cap LLM names
fixie-ai · Jul 29, 2024 · 4b5f09e · 4b5f09e
2 parents 2238046 + 64d8a6a
commit 4b5f09e
Showing 1 changed file with 6 additions and 4 deletions.
diff --git a/llm_benchmark_suite.py b/llm_benchmark_suite.py
@@ -354,7 +354,9 @@ def _text_models():
         _TogetherLlm("mistralai/Mixtral-8x7B-Instruct-v0.1", MIXTRAL_8X7B_INSTRUCT),
         # Llama 3.1 405b
         _DatabricksLlm("databricks-meta-llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
-        _DeepInfraLlm("meta-llama/Meta-Llama-3.1-405B-Instruct", LLAMA_31_405B_CHAT_FP8),
+        _DeepInfraLlm(
+            "meta-llama/Meta-Llama-3.1-405B-Instruct", LLAMA_31_405B_CHAT_FP8
+        ),
         _FireworksLlm(
             "accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
         ),
@@ -459,7 +461,7 @@ def _tools_models():
         _FireworksLlm("accounts/fireworks/models/firefunction-v2", "firefunction-v2"),
         # _FireworksLlm(
         #    "accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
-        # ), returns "FUNCTION"
+        # ), returns "FUNCTION" and the call as text
         _GroqLlm("llama-3.1-405b-reasoning", LLAMA_31_405B_CHAT_FP8),
         _NvidiaLlm("meta/llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
         _GroqLlm("llama-3.1-70b-versatile", LLAMA_31_70B_CHAT_FP8),
@@ -538,7 +540,7 @@ def _get_prompt(mode: str) -> List[str]:
         return ["@media/text/llama31.md"]
     elif mode == "tools":
         return [
-            "I have a flight booked for July 14, 2024, the flight number is AA100. Can you check its status for me?",
+            "I have a flight booked for July 14, 2024, and the flight number is AA100. Please check its status for me.",
             "--tool",
             "media/tools/flights.json",
         ]
@@ -600,7 +602,7 @@ def _format_response(
             total_time = r.total_time or 0.0
             output = (r.error or r.output).strip().replace("\n", "\\n")
             s += (
-                f"| {r.model:42} | {ttr:4.2f} | {ttft:4.2f} | {tps:3.0f} "
+                f"| {r.model[:42]:42} | {ttr:4.2f} | {ttft:4.2f} | {tps:3.0f} "
                 f"| {in_tokens:4} | {out_tokens:3} | {in_time:4.2f} | {out_time:4.2f} "
                 f"| {total_time:5.2f} | {output:{dlen}.{dlen}} |\n"
             )