diff --git a/.gitignore b/.gitignore index 96b53ca2e..067e81903 100644 --- a/.gitignore +++ b/.gitignore @@ -19,9 +19,6 @@ goex/docker/misc/images.json ################## Berkley Function Call Leaderboard ########################## -# Ignore API keys -berkeley-function-call-leaderboard/function_credential_config.json - # Ignore tree-sitter berkeley-function-call-leaderboard/eval_checker/tree-sitter-java berkeley-function-call-leaderboard/eval_checker/tree-sitter-javascript diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md index ef84d6fd0..85a905fcf 100644 --- a/berkeley-function-call-leaderboard/README.md +++ b/berkeley-function-call-leaderboard/README.md @@ -138,6 +138,8 @@ Below is *a table of models we support* to run our leaderboard evaluation agains |gpt-4o-2024-05-13| Prompt| |gpt-4o-mini-2024-07-18-FC | Function Calling| |gpt-4o-mini-2024-07-18 | Prompt| +|o1-mini-2024-09-12 | Prompt| +|o1-preview-2024-09-12 | Prompt| |google/gemma-7b-it 💻| Prompt| |meetkai/functionary-medium-v3.1-FC| Function Calling| |meetkai/functionary-small-{v3.1,v3.2}-FC| Function Calling| @@ -268,6 +270,8 @@ Some companies have proposed some optimization strategies in their models' handl ## Changelog +* [Sept 13, 2024] [#638](https://github.com/ShishirPatil/gorilla/pull/638): Fix prompt formatting issue for `THUDM/glm-4-9b-chat`. +* [Sept 12, 2024] [#635](https://github.com/ShishirPatil/gorilla/pull/635): Add new models `o1-preview-2024-09-12` and `o1-mini-2024-09-12` to the leaderboard. * [Sept 8, 2024] [#627](https://github.com/ShishirPatil/gorilla/pull/627) Add new model `MadeAgents/Hammer-7b` to the leaderboard. * [Sept 7, 2024] [#626](https://github.com/ShishirPatil/gorilla/pull/626): Fix prompt format for Llama models. * [Sept 4, 2024] [#623](https://github.com/ShishirPatil/gorilla/pull/623): Fix decoding issue in the `NvidiaHandler`; remove duplicate `ArcticHandler` class. diff --git a/berkeley-function-call-leaderboard/apply_function_credential_config.py b/berkeley-function-call-leaderboard/apply_function_credential_config.py index c5bb98f76..487fccd72 100644 --- a/berkeley-function-call-leaderboard/apply_function_credential_config.py +++ b/berkeley-function-call-leaderboard/apply_function_credential_config.py @@ -16,7 +16,7 @@ PLACEHOLDERS = {} for var in ENV_VARS: if os.getenv(var) == "": - raise NoAPIKeyError(var) + raise NoAPIKeyError() PLACEHOLDERS[f"YOUR-{var.replace('_', '-')}"] = os.getenv(var) diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/custom_exception.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/custom_exception.py index 3504862d8..9daa51f7d 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/custom_exception.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/custom_exception.py @@ -1,6 +1,6 @@ class NoAPIKeyError(Exception): def __init__(self): - self.message = "❗️Please fill in the API keys in the function_credential_config.json file. If you do not provide the API keys, the executable test category results will be inaccurate." + self.message = "❗️Please fill in the API keys in the .env file. If you do not provide the API keys, the executable test category results will be inaccurate." super().__init__(self.message) diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py index c2715393a..e5b7f9fab 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py @@ -89,6 +89,18 @@ ] MODEL_METADATA_MAPPING = { + "o1-preview-2024-09-12": [ + "o1-preview-2024-09-12 (Prompt)", + "https://openai.com/index/introducing-openai-o1-preview/", + "OpenAI", + "Proprietary", + ], + "o1-mini-2024-09-12": [ + "o1-mini-2024-09-12 (Prompt)", + "https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/", + "OpenAI", + "Proprietary", + ], "gpt-4o-2024-08-06": [ "GPT-4o-2024-08-06 (Prompt)", "https://openai.com/index/hello-gpt-4o/", @@ -597,6 +609,8 @@ "mistral-small-2402-FC-Auto": 1, "mistral-small-2402": 1, "mistral-tiny-2312": 0.25, + "o1-preview-2024-09-12": 15, + "o1-mini-2024-09-12": 3, "gpt-4o-2024-05-13-FC": 5, "gpt-4o-2024-05-13": 5, "gpt-4o-2024-08-06-FC": 2.5, @@ -651,6 +665,8 @@ "mistral-small-2402-FC-Any": 3, "mistral-small-2402-FC-Auto": 3, "mistral-tiny-2312": 0.25, + "o1-preview-2024-09-12": 60, + "o1-mini-2024-09-12": 12, "gpt-4o-2024-05-13-FC": 15, "gpt-4o-2024-05-13": 15, "gpt-4o-2024-08-06-FC": 10, diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/executable_python_function.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/executable_python_function.py index 20c1915e0..48f523cf0 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/executable_python_function.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/executable_python_function.py @@ -9,7 +9,7 @@ api_key = {} for var in ENV_VARS: if os.getenv(var) == "": - raise NoAPIKeyError(var) + raise NoAPIKeyError() api_key[var.replace("_", "-")] = os.getenv(var) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 5562dfe42..bfb9ddbdb 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -25,6 +25,8 @@ handler_map = { "gorilla-openfunctions-v0": GorillaHandler, "gorilla-openfunctions-v2": GorillaHandler, + "o1-preview-2024-09-12": OpenAIHandler, + "o1-mini-2024-09-12": OpenAIHandler, "gpt-4o-2024-08-06": OpenAIHandler, "gpt-4o-2024-08-06-FC": OpenAIHandler, "gpt-4o-2024-05-13": OpenAIHandler, diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py index 9ea72dee3..8edfdb8a9 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/glm.py @@ -1,25 +1,31 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import convert_to_function_call +from bfcl.model_handler.utils import convert_to_tool +from bfcl.model_handler.constant import GORILLA_TO_OPENAPI +from bfcl.model_handler.model_style import ModelStyle import json - class GLMHandler(OSSHandler): def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None: super().__init__(model_name, temperature, top_p, max_tokens) - self.max_model_len = 4096 + self.max_model_len = 8704 self.stop_token_ids = [151329, 151336, 151338] - def apply_chat_template(self, prompts, function, test_category): - return self.tokenizer.apply_chat_template( - prompts, tokenize=False, add_generation_prompt=True - ) + def apply_chat_template(self, prompts, functions, test_category): + formatted_prompt = "" + oai_tool = convert_to_tool(functions, GORILLA_TO_OPENAPI, ModelStyle.OpenAI, test_category) + if oai_tool: + formatted_prompt = "[gMASK]<|system|>\n你是一个名为 ChatGLM 的人工智能助手。你是基于智谱AI训练的语言模型 GLM-4 模型开发的,你的任务是针对用户的问题和要求提供适当的答复和支持。\n\n# 可用工具" + for tool in oai_tool: + formatted_prompt += f"\n\n## {tool['function']['name']}\n\n{json.dumps(tool['function'], indent=4)}" + formatted_prompt += "\n在调用上述函数时,请使用 Json 格式表示调用的参数。" - def inference(self, test_question, num_gpus, gpu_memory_utilization): - from transformers import AutoTokenizer + for prompt in prompts: + formatted_prompt += f"<|{prompt['role']}|>\n{prompt['content']}" + formatted_prompt += "<|assistant|>" + return formatted_prompt - self.tokenizer = AutoTokenizer.from_pretrained( - self.model_name, trust_remote_code=True - ) + def inference(self, test_question, num_gpus, gpu_memory_utilization): return super().inference( test_question, diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/proprietary_model/openai.py b/berkeley-function-call-leaderboard/bfcl/model_handler/proprietary_model/openai.py index 5b7de41c2..8562faf09 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/proprietary_model/openai.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/proprietary_model/openai.py @@ -6,6 +6,8 @@ system_prompt_pre_processing_chat_model, func_doc_language_specific_pre_processing, ast_parse, + convert_system_prompt_into_user_prompt, + combine_consecutive_user_prompr, ) from bfcl.model_handler.constant import ( GORILLA_TO_OPENAPI, @@ -31,16 +33,31 @@ def inference(self, prompt, functions, test_category): prompt = system_prompt_pre_processing_chat_model( prompt, DEFAULT_SYSTEM_PROMPT, functions ) + # Special handling for o1-preview and o1-mini as they don't support system prompts yet + if "o1-preview" in self.model_name or "o1-mini" in self.model_name: + prompt = convert_system_prompt_into_user_prompt(prompt) + prompt = combine_consecutive_user_prompr(prompt) message = prompt start_time = time.time() - response = self.client.chat.completions.create( - messages=message, - model=self.model_name, - temperature=self.temperature, - max_tokens=self.max_tokens, - top_p=self.top_p, - ) + # These two models have temperature and top_p fixed to 1, and max_tokens is not supported + # Beta limitation: https://platform.openai.com/docs/guides/reasoning/beta-limitations + if "o1-preview" in self.model_name or "o1-mini" in self.model_name: + response = self.client.chat.completions.create( + messages=message, + model=self.model_name, + temperature=1, + # max_tokens=self.max_tokens, + top_p=1, + ) + else: + response = self.client.chat.completions.create( + messages=message, + model=self.model_name, + temperature=self.temperature, + max_tokens=self.max_tokens, + top_p=self.top_p, + ) latency = time.time() - start_time result = response.choices[0].message.content metadata = {} diff --git a/berkeley-function-call-leaderboard/function_credential_config.json b/berkeley-function-call-leaderboard/function_credential_config.json deleted file mode 100644 index 9d36e9bbd..000000000 --- a/berkeley-function-call-leaderboard/function_credential_config.json +++ /dev/null @@ -1 +0,0 @@ -[{"RAPID-API-KEY" : ""},{"EXCHANGERATE-API-KEY" : ""},{"OMDB-API-KEY" : ""}, {"GEOCODE-API-KEY": ""}] \ No newline at end of file