Skip to content

Commit

Permalink
Merge pull request #114 from fixie-ai/juberti/input-tokens
Browse files Browse the repository at this point in the history
Use clearer tools prompt and cap LLM names
  • Loading branch information
juberti authored Jul 29, 2024
2 parents 2238046 + 64d8a6a commit 4b5f09e
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions llm_benchmark_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,9 @@ def _text_models():
_TogetherLlm("mistralai/Mixtral-8x7B-Instruct-v0.1", MIXTRAL_8X7B_INSTRUCT),
# Llama 3.1 405b
_DatabricksLlm("databricks-meta-llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
_DeepInfraLlm("meta-llama/Meta-Llama-3.1-405B-Instruct", LLAMA_31_405B_CHAT_FP8),
_DeepInfraLlm(
"meta-llama/Meta-Llama-3.1-405B-Instruct", LLAMA_31_405B_CHAT_FP8
),
_FireworksLlm(
"accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
),
Expand Down Expand Up @@ -459,7 +461,7 @@ def _tools_models():
_FireworksLlm("accounts/fireworks/models/firefunction-v2", "firefunction-v2"),
# _FireworksLlm(
# "accounts/fireworks/models/llama-v3p1-405b-instruct", LLAMA_31_405B_CHAT_FP8
# ), returns "FUNCTION"
# ), returns "FUNCTION" and the call as text
_GroqLlm("llama-3.1-405b-reasoning", LLAMA_31_405B_CHAT_FP8),
_NvidiaLlm("meta/llama-3.1-405b-instruct", LLAMA_31_405B_CHAT),
_GroqLlm("llama-3.1-70b-versatile", LLAMA_31_70B_CHAT_FP8),
Expand Down Expand Up @@ -538,7 +540,7 @@ def _get_prompt(mode: str) -> List[str]:
return ["@media/text/llama31.md"]
elif mode == "tools":
return [
"I have a flight booked for July 14, 2024, the flight number is AA100. Can you check its status for me?",
"I have a flight booked for July 14, 2024, and the flight number is AA100. Please check its status for me.",
"--tool",
"media/tools/flights.json",
]
Expand Down Expand Up @@ -600,7 +602,7 @@ def _format_response(
total_time = r.total_time or 0.0
output = (r.error or r.output).strip().replace("\n", "\\n")
s += (
f"| {r.model:42} | {ttr:4.2f} | {ttft:4.2f} | {tps:3.0f} "
f"| {r.model[:42]:42} | {ttr:4.2f} | {ttft:4.2f} | {tps:3.0f} "
f"| {in_tokens:4} | {out_tokens:3} | {in_time:4.2f} | {out_time:4.2f} "
f"| {total_time:5.2f} | {output:{dlen}.{dlen}} |\n"
)
Expand Down

0 comments on commit 4b5f09e

Please sign in to comment.