Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into pr/Cppowboy/718
Browse files Browse the repository at this point in the history
  • Loading branch information
HuanzhiMao committed Nov 26, 2024
2 parents af2b2bb + 7d3bf66 commit 3c87d3e
Show file tree
Hide file tree
Showing 10 changed files with 339 additions and 59 deletions.
7 changes: 7 additions & 0 deletions berkeley-function-call-leaderboard/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@

All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file.

- [Nov 25, 2024] [#697](https://github.com/ShishirPatil/gorilla/pull/697): Add the following new models to the leaderboard:
- `deepseek-ai/DeepSeek-V2.5`
- `deepseek-ai/DeepSeek-Coder-V2-Instruct-0724`
- `deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct`
- `deepseek-ai/DeepSeek-V2-Chat-0628`
- `deepseek-ai/DeepSeek-V2-Lite-Chat`
- [Nov 25, 2024] [#787](https://github.com/ShishirPatil/gorilla/pull/787): Add new model `Qwen/Qwen2.5-72B-Instruct` to the leaderboard.
- [Nov 24, 2024] [#743](https://github.com/ShishirPatil/gorilla/pull/743): Add support for regeneration, specific test entry IDs, and custom directory locations:
- Introduce the `--allow-overwrite` flag for the `generate` command to enable regeneration of test entries even if they already exist.
- Add a new `--run-ids` flag for the `generate` command, allowing execution of specific test entry IDs from `test_case_ids_to_generate.json`.
Expand Down
4 changes: 3 additions & 1 deletion berkeley-function-call-leaderboard/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,9 @@ Below is _a table of models we support_ to run our leaderboard evaluation agains
|command-r-plus-FC | Function Calling|
|command-r-plus | Prompt|
|databrick-dbrx-instruct | Prompt|
|deepseek-ai/deepseek-coder-6.7b-instruct 💻| Prompt|
|deepseek-ai/DeepSeek-V2.5 💻| Function Calling|
|deepseek-ai/DeepSeek-V2-{Chat-0628,Lite-Chat} 💻| Prompt|
|deepseek-ai/DeepSeek-Coder-V2-{Instruct-0724,Lite-Instruct} 💻| Function Calling|
|firefunction-{v1,v2}-FC | Function Calling|
|gemini-1.0-pro-{001,002}-FC | Function Calling|
|gemini-1.0-pro-{001,002} | Prompt|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,20 +125,18 @@ def collect_test_cases(
for test_case in all_test_entries_involved
if test_case["id"] not in existing_ids
]
test_cases_to_generate = process_multi_turn_test_case(
test_cases_to_generate, test_category
)
test_cases_to_generate = process_multi_turn_test_case(test_cases_to_generate)

return sorted(test_cases_to_generate, key=sort_key)


def process_multi_turn_test_case(test_cases, test_category):
def process_multi_turn_test_case(test_cases):
"""
Multi-turn test cases don't have the function doc in the prompt. We need to add them here.
"""
if not is_multi_turn(test_category):
return test_cases
for entry in test_cases:
if not is_multi_turn(entry["id"]):
continue
involved_classes = entry["involved_classes"]
entry["function"] = []
for func_collection in involved_classes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -355,11 +355,35 @@
"OpenAI",
"Proprietary",
],
"deepseek-ai/deepseek-coder-6.7b-instruct": [
"Deepseek-v1.5 (Prompt)",
"https://huggingface.co/deepseek-ai/deepseek-coder-7b-instruct-v1.5",
"Deepseek",
"Deepseek License",
"deepseek-ai/DeepSeek-V2.5": [
"DeepSeek-V2.5 (FC)",
"https://huggingface.co/deepseek-ai/DeepSeek-V2.5",
"DeepSeek",
"DeepSeek License"
],
"deepseek-ai/DeepSeek-Coder-V2-Instruct-0724": [
"DeepSeek-Coder-V2 (FC)",
"https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct-0724",
"DeepSeek",
"DeepSeek License"
],
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": [
"DeepSeek-Coder-V2-Lite-Instruct (FC)",
"https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
"DeepSeek",
"DeepSeek License"
],
"deepseek-ai/DeepSeek-V2-Chat-0628": [
"DeepSeek-V2 (Prompt)",
"https://huggingface.co/deepseek-ai/DeepSeek-V2-Chat-0628",
"DeepSeek",
"DeepSeek License",
],
"deepseek-ai/DeepSeek-V2-Lite-Chat": [
"DeepSeek-V2-Lite (Prompt)",
"https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite-Chat",
"DeepSeek",
"DeepSeek License",
],
"google/gemma-7b-it": [
"Gemma-7b-it (Prompt)",
Expand Down Expand Up @@ -649,6 +673,12 @@
"Qwen",
"apache-2.0",
],
"Qwen/Qwen2.5-72B-Instruct": [
"Qwen2.5-72B-Instruct (Prompt)",
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
"Qwen",
"apache-2.0",
],
"Team-ACE/ToolACE-8B": [
"ToolACE-8B (FC)",
"https://huggingface.co/Team-ACE/ToolACE-8B",
Expand Down Expand Up @@ -815,17 +845,4 @@
"meetkai/functionary-medium-v3.1-FC",
"snowflake/arctic",
"nvidia/nemotron-4-340b-instruct",
"ibm-granite/granite-20b-functioncalling",
"THUDM/glm-4-9b-chat",
"Salesforce/xLAM-1b-fc-r",
"Salesforce/xLAM-7b-fc-r",
"Salesforce/xLAM-7b-r",
"Salesforce/xLAM-8x7b-r",
"Salesforce/xLAM-8x22b-r",
"Team-ACE/ToolACE-8B",
"MadeAgents/Hammer2.0-7b",
"MadeAgents/Hammer2.0-3b",
"MadeAgents/Hammer2.0-1.5b",
"MadeAgents/Hammer2.0-0.5b",
"BitAgent/GoGoAgent",
]
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from bfcl.model_handler.oss_model.deepseek import DeepseekHandler
from bfcl.model_handler.oss_model.deepseek_coder import DeepseekCoderHandler
from bfcl.model_handler.oss_model.gemma import GemmaHandler
from bfcl.model_handler.oss_model.glaive import GlaiveHandler
from bfcl.model_handler.oss_model.glm import GLMHandler
Expand All @@ -7,26 +8,26 @@
from bfcl.model_handler.oss_model.hermes import HermesHandler
from bfcl.model_handler.oss_model.llama import LlamaHandler
from bfcl.model_handler.oss_model.llama_fc import LlamaFCHandler
from bfcl.model_handler.oss_model.phi import PhiHandler
from bfcl.model_handler.oss_model.salesforce import SalesforceHandler
from bfcl.model_handler.oss_model.qwen import QwenHandler
from bfcl.model_handler.oss_model.minicpm import MiniCPMHandler
from bfcl.model_handler.oss_model.minicpm_fc import MiniCPMFCHandler
from bfcl.model_handler.oss_model.phi import PhiHandler
from bfcl.model_handler.oss_model.qwen import QwenHandler
from bfcl.model_handler.oss_model.salesforce import SalesforceHandler
from bfcl.model_handler.proprietary_model.claude import ClaudeHandler
from bfcl.model_handler.proprietary_model.cohere import CohereHandler
from bfcl.model_handler.proprietary_model.databricks import DatabricksHandler
from bfcl.model_handler.proprietary_model.fireworks import FireworksHandler
from bfcl.model_handler.proprietary_model.functionary import FunctionaryHandler
from bfcl.model_handler.proprietary_model.gemini import GeminiHandler
from bfcl.model_handler.proprietary_model.gogoagent import GoGoAgentHandler
from bfcl.model_handler.proprietary_model.gorilla import GorillaHandler
from bfcl.model_handler.proprietary_model.mistral import MistralHandler
from bfcl.model_handler.proprietary_model.nexus import NexusHandler
from bfcl.model_handler.proprietary_model.nvidia import NvidiaHandler
from bfcl.model_handler.proprietary_model.openai import OpenAIHandler
from bfcl.model_handler.proprietary_model.yi import YiHandler
from bfcl.model_handler.proprietary_model.gogoagent import GoGoAgentHandler

# TODO: Add Deepseek V2, meta-llama/Llama-3.1-405B-Instruct
# TODO: Add meta-llama/Llama-3.1-405B-Instruct

# Inference through API calls
api_inference_handler_map = {
Expand Down Expand Up @@ -129,9 +130,15 @@
"Qwen/Qwen2-7B-Instruct": QwenHandler,
"Qwen/Qwen2.5-1.5B-Instruct": QwenHandler,
"Qwen/Qwen2.5-7B-Instruct": QwenHandler,
"Qwen/Qwen2.5-72B-Instruct": QwenHandler,
"Team-ACE/ToolACE-8B": LlamaHandler,
"openbmb/MiniCPM3-4B": MiniCPMHandler,
"openbmb/MiniCPM3-4B-FC": MiniCPMFCHandler,
"deepseek-ai/DeepSeek-V2.5": DeepseekCoderHandler,
"deepseek-ai/DeepSeek-Coder-V2-Instruct-0724": DeepseekCoderHandler,
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": DeepseekCoderHandler,
"deepseek-ai/DeepSeek-V2-Chat-0628": DeepseekHandler,
"deepseek-ai/DeepSeek-V2-Lite-Chat": DeepseekHandler,
}

# Deprecated/outdated models, no longer on the leaderboard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,17 @@ def batch_inference(
backend: str,
include_input_log: bool,
include_state_log: bool,
overwrite: bool,
update_mode: bool,
result_dir=RESULT_PATH,
):
"""
Batch inference for OSS models.
"""
from transformers import AutoConfig, AutoTokenizer

self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_huggingface)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_huggingface, trust_remote_code=True)

config = AutoConfig.from_pretrained(self.model_name_huggingface)
config = AutoConfig.from_pretrained(self.model_name_huggingface, trust_remote_code=True)
if hasattr(config, "max_position_embeddings"):
self.max_context_length = config.max_position_embeddings
elif self.tokenizer.model_max_length is not None:
Expand Down Expand Up @@ -194,10 +194,7 @@ def log_subprocess_output(pipe, stop_event):
for future in futures:
# This will wait for the task to complete, so that we are always writing in order
result = future.result()
if overwrite:
self.overwrite(result, result_dir)
else:
self.write(result, result_dir)
self.write(result, result_dir, update_mode=update_mode)
pbar.update()


Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler



class DeepseekHandler(OSSHandler):
"""
This is the handler for the Deepseek model. Deepseek-Coder models should use the DeepseekCoderHandler instead.
Note: `deepseek-ai/DeepSeek-V2.5` DO NOT use this handler, but the DeepseekCoderHandler, because it share the same chat template with the DeepSeek-Coder models.
"""
def __init__(self, model_name, temperature) -> None:
super().__init__(model_name, temperature)

def decode_ast(self, result, language="Python"):
result = result.strip()
if result.startswith("```json"):
result = result[len("```json"):]
if result.startswith("```python"):
result = result[len("```python"):]
return super().decode_ast(result, language)

def decode_execute(self, result):
if result.startswith("```json"):
result = result[len("```json"):]
if result.startswith("```python"):
result = result[len("```python"):]
return super().decode_execute(result)

def _format_prompt(self, messages, function):
"""
"bos_token": {
Expand All @@ -15,30 +33,50 @@ def _format_prompt(self, messages, function):
"normalized": true,
"rstrip": false,
"single_word": false
}
"chat_template": "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}"
},
"eos_token": {
"__type": "AddedToken",
"content": "<|end▁of▁sentence|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\n\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}"
"""

formatted_prompt = "<|begin▁of▁sentence|>"

for message in messages:
formatted_prompt += "\n "
if message["role"] == "system":
formatted_prompt += f"\n{message['content']}\n "
else:
formatted_prompt += "\n "
if message["role"] == "user":
formatted_prompt += (
f"\n### Instruction:\\n{message['content']}\\n\n "
)
else:
formatted_prompt += (
f"\n### Response:\\n{message['content']}\\n<|EOT|>\\n\n "
)
formatted_prompt += "\n "
formatted_prompt += "\n"

formatted_prompt += "\n### Response:\n"
if message["role"] == "user":
formatted_prompt += f"User: {message['content']}\n\n"
elif message["role"] == "assistant":
formatted_prompt += f"Assistant: {message['content']}<|end▁of▁sentence|>"
elif message["role"] == "system":
formatted_prompt += f"{message['content']}\n\n"

formatted_prompt += "Assistant:"

return formatted_prompt

def _add_execution_results_prompting(
self, inference_data: dict, execution_results: list[str], model_response_data: dict
) -> dict:
# Deepseek don't take the tool role; so we use the user role to send the tool output
tool_message = {
"role": "user",
"content": [],
}
for execution_result, decoded_model_response in zip(
execution_results, model_response_data["model_responses_decoded"]
):
tool_message["content"].append(
{
"role": "tool",
"name": decoded_model_response,
"content": execution_result,
}
)

inference_data["message"].append(tool_message)

return inference_data
Loading

0 comments on commit 3c87d3e

Please sign in to comment.