From 23ba990a2667600fe0f545fbda2e5ac13008e496 Mon Sep 17 00:00:00 2001 From: pyx9913 Date: Wed, 23 Oct 2024 20:44:55 +0800 Subject: [PATCH 1/8] Add minicpm3 4b FC model handler --- .../bfcl/eval_checker/model_metadata.py | 8 +- .../bfcl/model_handler/constant.py | 1 + .../bfcl/model_handler/handler_map.py | 2 + .../bfcl/model_handler/oss_model/minicpm.py | 5 +- .../model_handler/oss_model/minicpm_fc.py | 427 ++++++++++++++++++ .../pyproject.toml | 1 + 6 files changed, 442 insertions(+), 2 deletions(-) create mode 100644 berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py index 0081d312c..a7fda2ea5 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py @@ -644,11 +644,17 @@ "Apache-2.0", ], "openbmb/MiniCPM3-4B": [ - "MiniCPM3-4B (FC)", + "MiniCPM3-4B (Prompt)", "https://huggingface.co/openbmb/MiniCPM3-4B", "openbmb", "Apache-2.0", ], + "openbmb/MiniCPM3-4B-FC": [ + "MiniCPM3-4B-FC (FC)", + "https://huggingface.co/openbmb/MiniCPM3-4B-FC", + "openbmb", + "Apache-2.0", + ], } INPUT_PRICE_PER_MILLION_TOKEN = { diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py b/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py index 5b25b8eac..1bd6df267 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/constant.py @@ -146,4 +146,5 @@ "THUDM/glm-4-9b-chat", "ibm-granite/granite-20b-functioncalling", "yi-large-fc", + "openbmb/MiniCPM3-4B-FC", ] diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index eae669af0..713795f72 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -11,6 +11,7 @@ from bfcl.model_handler.oss_model.salesforce import SalesforceHandler from bfcl.model_handler.oss_model.qwen import QwenHandler from bfcl.model_handler.oss_model.minicpm import MiniCPMHandler +from bfcl.model_handler.oss_model.minicpm_fc import MiniCPMFCHandler from bfcl.model_handler.proprietary_model.claude import ClaudeHandler from bfcl.model_handler.proprietary_model.cohere import CohereHandler from bfcl.model_handler.proprietary_model.databricks import DatabricksHandler @@ -126,6 +127,7 @@ "Qwen/Qwen2.5-7B-Instruct": QwenHandler, "Team-ACE/ToolACE-8B": LlamaHandler, "openbmb/MiniCPM3-4B": MiniCPMHandler, + "openbmb/MiniCPM3-4B-FC": MiniCPMFCHandler, } # Deprecated/outdated models, no longer on the leaderboard diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py index 521492598..4a6913796 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm.py @@ -1,5 +1,6 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler + class MiniCPMHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) @@ -11,7 +12,9 @@ def _format_prompt(self, messages, function): formatted_prompt = "" for message in messages: - formatted_prompt += f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n" + formatted_prompt += ( + f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n" + ) formatted_prompt += f"<|im_start|>assistant\n" diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py new file mode 100644 index 000000000..36fc585f1 --- /dev/null +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py @@ -0,0 +1,427 @@ +import ast +import json +import keyword +import logging +import traceback +from typing import Dict, List + +import datamodel_code_generator +from datamodel_code_generator import DataModelType +from datamodel_code_generator.model import get_data_model_types +from datamodel_code_generator.parser.jsonschema import JsonSchemaParser +from overrides import overrides + +from bfcl.eval_checker.ast_eval.ast_checker import convert_func_name +from bfcl.model_handler.constant import ( + GORILLA_TO_OPENAPI, +) +from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler +from bfcl.model_handler.utils import ( + convert_to_tool, + func_doc_language_specific_pre_processing, + resolve_ast_call, +) +from overrides import overrides + +logger = logging.getLogger("minicpm") + + +class MiniCPMFCHandler(OSSHandler): + def __init__(self, model_name, temperature) -> None: + super().__init__(model_name, temperature) + self.stop_token_ids = [2, 73440] + + @overrides + def _query_prompting(self, inference_data: dict): + # We use the OpenAI Completions API with vLLM + function: list[dict] = inference_data["function"] + message: list[dict] = inference_data["message"] + + formatted_prompt: str = self._format_prompt(message, function) + inference_data["inference_input_log"] = {"formatted_prompt": formatted_prompt} + + if hasattr(self, "stop_token_ids"): + api_response = self.client.completions.create( + model=self.model_name_huggingface, + temperature=self.temperature, + prompt=formatted_prompt, + extra_body={ + "skip_special_tokens": False, + "stop_token_ids": self.stop_token_ids, + }, + max_tokens=512, # TODO: Is there a better way to handle this? + ) + else: + api_response = self.client.completions.create( + model=self.model_name_huggingface, + temperature=self.temperature, + prompt=formatted_prompt, + extra_body={"skip_special_tokens": False}, + max_tokens=512, + ) + + return api_response + + @overrides + def _format_prompt(self, messages, function): + """ + "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" + """ + tools = convert_to_tool(function, GORILLA_TO_OPENAPI, self.model_style) + + formated_messages = minicpm_input_format( + messages=messages, tools=function, model_name=self.model_name + ) + formatted_prompt = "" + for message in formated_messages: + formatted_prompt += ( + f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>\n" + ) + + formatted_prompt += "<|im_start|>assistant\n" + return formatted_prompt + + @overrides + def _pre_query_processing_prompting(self, test_entry: dict) -> dict: + functions: list = test_entry["function"] + test_category: str = test_entry["id"].rsplit("_", 1)[0] + + functions = func_doc_language_specific_pre_processing(functions, test_category) + + # Hermes use its own system prompt + + return {"message": [], "function": functions} + + @overrides + def _add_execution_results_prompting( + self, + inference_data: dict, + execution_results: list[str], + model_response_data: dict, + ) -> dict: + for execution_result, decoded_model_response in zip( + execution_results, model_response_data["model_responses_decoded"] + ): + inference_data["message"].append( + {"role": "tool", "content": execution_result} + ) + + return inference_data + + def decode_ast(self, result, language="Python"): + msg = fc2dict(result) + if ( + "tool_calls" in msg + and msg["tool_calls"] is not None + and len(msg["tool_calls"]) > 0 + ): + return [ + {tool_call["name"]: tool_call["arguments"]} + for tool_call in msg["tool_calls"] + ] + else: + return msg["content"] + + def decode_execute(self, result): + msg = fc2dict(result) + if ( + "tool_calls" in msg + and msg["tool_calls"] is not None + and len(msg["tool_calls"]) > 0 + ): + execution_list = [] + for tool_call in msg["tool_calls"]: + func_name = tool_call["name"] + args_str = ", ".join( + f"{k}={repr(v)}" for k, v in tool_call["arguments"].items() + ) + execution_list.append(f"{func_name}({args_str})") + return execution_list + else: + return msg["content"] + + +def message_format(msg, system_suffix="", user_prefix=""): + if "thought" in msg and msg["thought"] is not None and len(msg["thought"]) > 0: + thought_prefix = f"<|thought_start|>\n{msg['thought']}\n<|thought_end|>\n" + else: + thought_prefix = "" + if msg["role"] == "assistant": + content = msg.get("content", "") + if content is None: + content = "" + if ( + "tool_calls" in msg + and msg["tool_calls"] is not None + and len(msg["tool_calls"]) > 0 + ): + + def add_quotes(variable): + if isinstance(variable, str): + return repr(variable) + else: + return str(variable) + + tool_calls = [] + for tool_call in msg["tool_calls"]: + if tool_call is None: + continue + tool_name = tool_call["name"] + if "arguments" not in tool_call or tool_call["arguments"] is None: + continue + if isinstance(tool_call["arguments"], str): + try: + tool_call["arguments"] = json.loads(tool_call["arguments"]) + except: + continue + args = ",".join( + [k + "=" + add_quotes(v) for k, v in tool_call["arguments"].items()] + ) + tool_calls.append(f"{tool_name}({args})") + + content = ( + thought_prefix + + "<|tool_call_start|>\n```python\n" + + "\n".join(tool_calls).strip() + + "\n```\n<|tool_call_end|>\n" + + content + ) + # msg["tool_call_string"] = "\n".join(tool_calls).strip() + msg["content"] = content + else: + content = thought_prefix + content + msg["content"] = content + elif msg["role"] == "user": + msg["content"] = user_prefix + "\n" + msg["content"] + elif msg["role"] == "system": + msg["content"] = msg["content"] + "\n" + system_suffix + msg["content"] = msg["content"].strip() + return msg + + +def jsonschema_to_code(jsonschema: dict) -> str: + input_text = json.dumps(jsonschema) + if datamodel_code_generator.get_version() < "0.26.2": + from datamodel_code_generator.format import PythonVersion + + data_model_types = get_data_model_types( + DataModelType.PydanticBaseModel, + target_python_version=PythonVersion.PY_310, + ) + else: + from datamodel_code_generator.format import DatetimeClassType, PythonVersion + + data_model_types = get_data_model_types( + DataModelType.PydanticBaseModel, + target_python_version=PythonVersion.PY_310, + target_datetime_class=DatetimeClassType.Datetime, + ) + parser = JsonSchemaParser( + source=input_text, + data_model_type=data_model_types.data_model, + data_model_root_type=data_model_types.root_model, + data_model_field_type=data_model_types.field_model, + data_type_manager_type=data_model_types.data_type_manager, + target_python_version=PythonVersion.PY_311, + dump_resolve_reference_action=data_model_types.dump_resolve_reference_action, + field_constraints=True, + ) + results = parser.parse() + return results + + +def transform_function(function: dict): + """turn json format of function into signature""" + params, default_params = [], [] + for prop_name, prop in function["parameters"]["properties"].items(): + if "default" in prop: + default_params.append(f'{prop_name}={repr(prop["default"])}') + elif prop_name not in function["parameters"].get("required", []): + default_params.append(f"{prop_name}={repr(None)}") + else: + params.append(prop_name) + ps = ", ".join(params + default_params) + res = "def {f_name}({ps}):\n".format(f_name=function["name"], ps=ps) + f_des = function.get("description", "") + content = jsonschema_to_code(function["parameters"]) + if "class" in content: + i = content.index("class") + # print(content[:i]) + content = content[i:] + classes, args = content.split("class Model(BaseModel):", 1) + lint_msg = f' """\n {f_des}\n Args:\n{args}\n """\n' + res += lint_msg + if len(classes) > 0: + res = classes + res + return res + + +def rename_tool(tool, model_name): + properties = {} + tool["name"] = convert_func_name(tool["name"], model_name) + for key, value in tool["parameters"]["properties"].items(): + if key in keyword.kwlist: + properties["_" + key] = value + else: + properties[key] = value + tool["parameters"]["properties"] = properties + return tool + + +def minicpm_input_format( + messages: List[Dict], + tools: List[Dict], + add_to_system=True, + model_name="openbmb/MiniCPM3-4B", +): + """ + Process the input messages, global_arguments, tools, tool_choice, + and convert it into a input string. + The global arguments and tools can not be both empty. + parameters: + messages: List[Dict] + the input messages + For example: + tools: List[Dict] + the tools list you can use + For example: + """ + if tools is not None and len(tools) > 0: + header = "from enum import Enum\nfrom typing import List, Dict, Optional\nfrom pydantic import BaseModel, Field\n\n" + tools_string = header + for tool in tools: + tool = rename_tool(tool, model_name) + try: + tools_string += "\n\n" + transform_function(tool) + except: + print(traceback.format_exc()) + tools_template = """# Functions +Here is a list of functions that you can invoke: +```python +{tools} +``` + +# Function Call Rule and Output Format +- If the user's question can be answered without calling any function, please answer the user's question directly. In this situation, you should return your thought and answer the user's question directly. +- If the user cannot be answered without calling any function, and the user does not provide enough information to call functions, please ask the user for more information. In this situation, you should return your thought and ask the user for more information. +- If the user's question cannot be answered without calling any function, and the user has provided enough information to call functions to solve it, you should call the functions. In this situation, the assistant should return your thought and call the functions. +- Use default parameters unless the user has specified otherwise. +- You should answer in the following format: + +<|thought_start|> +{{explain why the user's question can be answered without calling a function or why you should ask the user for more information or why you should call one or more functions and your plan to solve the user's question.}} +<|thought_end|> +<|tool_call_start|> +```python +func1(params_name=params_value, params_name2=params_value2...) +func2(params) +``` +<|tool_call_end|> +{{answer the user's question directly or ask the user for more information}} +""" + tools_string = tools_template.format(tools=tools_string).strip() + else: + tools_string = "" + + if add_to_system: + if len(messages) > 0 and messages[0]["role"] != "system": + messages.insert(0, {"role": "system", "content": ""}) + return [ + message_format(msg, system_suffix=tools_string, user_prefix="") + for msg in messages + ] + else: + return [ + message_format(msg, system_suffix="", user_prefix=tools_string) + for msg in messages + ] + + +def convert_function_call_to_json(string): + # print('converting', string) + try: + tool_calls = [] + x = ast.parse(string) + for tool in x.body: + function_name = tool.value.func.id + function_args = {} + for kw in tool.value.keywords: + function_args[kw.arg] = ast.literal_eval(kw.value) + this_one = {"name": function_name, "arguments": function_args} + # print('converted to', this_one) + tool_calls.append(this_one) + return tool_calls + except Exception: + return [] + + +def fc2dict( + sequence: str, + tool_call_start="<|tool_call_start|>", + tool_call_end="<|tool_call_end|>", + thought_start="<|thought_start|>", + thought_end="<|thought_end|>", +): + if thought_end in sequence and thought_start in sequence: + thought_string, sequence = sequence.rsplit(thought_end, 1) + thought_string = thought_string.split(thought_start, 1)[1] + else: + thought_string = "" + if tool_call_start in sequence and tool_call_end in sequence: + tool_call_string, content = sequence.rsplit(tool_call_end, 1) + tool_call_string = tool_call_string.split(tool_call_start, 1)[1] + try: + tool_calls = [] + tool_call_string = tool_call_string.strip() + if tool_call_string.startswith("```"): + tool_call_string = tool_call_string.lstrip("```").strip() + if tool_call_string.startswith("python"): + tool_call_string = tool_call_string.lstrip("python").strip() + if tool_call_string.endswith("```"): + tool_call_string = tool_call_string.rstrip("```").strip() + for kw in keyword.kwlist: + tool_call_string = tool_call_string.replace( + "," + kw + "=", "," + kw + "_=" + ) + tool_call_string = tool_call_string.replace( + " " + kw + "=", " " + kw + "_=" + ) + tool_call_string = tool_call_string.replace( + "(" + kw + "=", "(" + kw + "_=" + ) + + parsed = ast.parse(tool_call_string) + + for elem in parsed.body: + assert isinstance(elem.value, ast.Call) + calls = resolve_ast_call(elem.value) + + for func_name, func_args in calls.items(): + new_args = {} + for k, v in func_args.items(): + for kw in keyword.kwlist: + if k == kw + "_": + k = kw + new_args[k] = v + + this_one = {"name": func_name, "arguments": new_args} + tool_calls.append(this_one) + + return { + "content": content.strip(), + "tool_calls": tool_calls, + "role": "assistant", + } + except: + logger.error(traceback.format_exc()) + return { + "content": content.strip(), + "role": "assistant", + "thought": thought_string, + } + else: + return { + "content": sequence.strip(), + "role": "assistant", + "thought": thought_string, + } diff --git a/berkeley-function-call-leaderboard/pyproject.toml b/berkeley-function-call-leaderboard/pyproject.toml index eb433f2af..58f4fa7be 100644 --- a/berkeley-function-call-leaderboard/pyproject.toml +++ b/berkeley-function-call-leaderboard/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "typer>=0.12.5", "tabulate>=0.9.0", "google-cloud-aiplatform>=1.70.0", + "datamodel-code-generator==0.25.7", ] [project.scripts] From de1239196f3a7ff3fceaac92b7a74b31d4a5f61d Mon Sep 17 00:00:00 2001 From: pyx9913 Date: Mon, 11 Nov 2024 10:50:38 +0800 Subject: [PATCH 2/8] fix model url --- .../bfcl/eval_checker/model_metadata.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py index a7fda2ea5..8a0c00be4 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/model_metadata.py @@ -651,7 +651,7 @@ ], "openbmb/MiniCPM3-4B-FC": [ "MiniCPM3-4B-FC (FC)", - "https://huggingface.co/openbmb/MiniCPM3-4B-FC", + "https://huggingface.co/openbmb/MiniCPM3-4B", "openbmb", "Apache-2.0", ], @@ -701,10 +701,10 @@ "gemini-1.5-pro-002-FC": 1.25, "gemini-1.5-pro-001": 1.25, "gemini-1.5-pro-001-FC": 1.25, - "gemini-1.5-flash-002": 0.075 , - "gemini-1.5-flash-002-FC": 0.075 , - "gemini-1.5-flash-001": 0.075 , - "gemini-1.5-flash-001-FC": 0.075 , + "gemini-1.5-flash-002": 0.075, + "gemini-1.5-flash-002-FC": 0.075, + "gemini-1.5-flash-001": 0.075, + "gemini-1.5-flash-001-FC": 0.075, "gemini-1.0-pro-002": 0.5, "gemini-1.0-pro-002-FC": 0.5, "databricks-dbrx-instruct": 2.25, @@ -776,10 +776,9 @@ # The latency of the open-source models are hardcoded here. # Because we do batching when generating the data, so the latency is not accurate from the result data. # This is the latency for the whole batch of data, when using 8 V100 GPUs. -OSS_LATENCY = { -} +OSS_LATENCY = {} -# All OSS models will have no cost shown on the leaderboard. +# All OSS models will have no cost shown on the leaderboard. NO_COST_MODELS = list(local_inference_handler_map.keys()) # The following models will also have no cost, even though they are queries through the API. NO_COST_MODELS += [ @@ -797,7 +796,7 @@ "Salesforce/xLAM-7b-r", "Salesforce/xLAM-8x7b-r", "Salesforce/xLAM-8x22b-r", - "Team-ACE/ToolACE-8B", + "Team-ACE/ToolACE-8B", "MadeAgents/Hammer2.0-7b", "MadeAgents/Hammer2.0-3b", "MadeAgents/Hammer2.0-1.5b", From 522b31ff0d082c70780f6e7f9cc72d3904df7bb1 Mon Sep 17 00:00:00 2001 From: pyx9913 Date: Mon, 11 Nov 2024 20:40:10 +0800 Subject: [PATCH 3/8] fix decode_execute when there is not function call --- .../bfcl/model_handler/oss_model/minicpm_fc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py index 36fc585f1..956dd7325 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py @@ -138,7 +138,7 @@ def decode_execute(self, result): execution_list.append(f"{func_name}({args_str})") return execution_list else: - return msg["content"] + return [] def message_format(msg, system_suffix="", user_prefix=""): From 5bfd400c9d119674ca6a5025482c13d487b8dbed Mon Sep 17 00:00:00 2001 From: "Huanzhi (Hans) Mao" Date: Mon, 25 Nov 2024 20:54:08 -0800 Subject: [PATCH 4/8] generalize _query_prompting --- .../oss_model/base_oss_handler.py | 8 +++- .../model_handler/oss_model/minicpm_fc.py | 48 +++---------------- 2 files changed, 14 insertions(+), 42 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py index e6e93da57..010be1edd 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/base_oss_handler.py @@ -273,13 +273,19 @@ def _query_prompting(self, inference_data: dict): else: leftover_tokens_count = min(4096, self.max_context_length - input_token_count - 2) + extra_body = {} if hasattr(self, "stop_token_ids"): + extra_body["stop_token_ids"] = self.stop_token_ids + if hasattr(self, "skip_special_tokens"): + extra_body["skip_special_tokens"] = self.skip_special_tokens + + if len(extra_body) > 0: api_response = self.client.completions.create( model=self.model_name_huggingface, temperature=self.temperature, prompt=formatted_prompt, max_tokens=leftover_tokens_count, - extra_body={"stop_token_ids": self.stop_token_ids}, + extra_body=extra_body, ) else: api_response = self.client.completions.create( diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py index 956dd7325..6643cc604 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py @@ -6,21 +6,17 @@ from typing import Dict, List import datamodel_code_generator -from datamodel_code_generator import DataModelType -from datamodel_code_generator.model import get_data_model_types -from datamodel_code_generator.parser.jsonschema import JsonSchemaParser -from overrides import overrides - from bfcl.eval_checker.ast_eval.ast_checker import convert_func_name -from bfcl.model_handler.constant import ( - GORILLA_TO_OPENAPI, -) +from bfcl.model_handler.constant import GORILLA_TO_OPENAPI from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import ( convert_to_tool, func_doc_language_specific_pre_processing, resolve_ast_call, ) +from datamodel_code_generator import DataModelType +from datamodel_code_generator.model import get_data_model_types +from datamodel_code_generator.parser.jsonschema import JsonSchemaParser from overrides import overrides logger = logging.getLogger("minicpm") @@ -30,42 +26,12 @@ class MiniCPMFCHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) self.stop_token_ids = [2, 73440] - - @overrides - def _query_prompting(self, inference_data: dict): - # We use the OpenAI Completions API with vLLM - function: list[dict] = inference_data["function"] - message: list[dict] = inference_data["message"] - - formatted_prompt: str = self._format_prompt(message, function) - inference_data["inference_input_log"] = {"formatted_prompt": formatted_prompt} - - if hasattr(self, "stop_token_ids"): - api_response = self.client.completions.create( - model=self.model_name_huggingface, - temperature=self.temperature, - prompt=formatted_prompt, - extra_body={ - "skip_special_tokens": False, - "stop_token_ids": self.stop_token_ids, - }, - max_tokens=512, # TODO: Is there a better way to handle this? - ) - else: - api_response = self.client.completions.create( - model=self.model_name_huggingface, - temperature=self.temperature, - prompt=formatted_prompt, - extra_body={"skip_special_tokens": False}, - max_tokens=512, - ) - - return api_response + self.skip_special_tokens = False @overrides def _format_prompt(self, messages, function): """ - "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" + "chat_template": "{%- macro json_to_python_type(param_name, json_spec) %}\n{%- set basic_type_map = {\n 'string': 'str',\n 'number': 'float',\n 'integer': 'int',\n 'boolean': 'bool',\n 'null': 'None'\n} %}\n\n{%- if json_spec.enum %}\n {{- param_name|title }}\n{%- elif basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == 'array' %}\n {{- 'List[' + json_to_python_type(param_name, json_spec['items']) + ']' }}\n{%- elif json_spec.type == 'object' %}\n {{- 'Dict[str, ' + json_to_python_type(param_name, json_spec.additionalProperties if json_spec.additionalProperties else 'Any') + ']' if not json_spec.properties else param_name|title }}\n{%- elif json_spec.type is iterable %}\n {{- 'Union[' }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type(param_name, {'type': t}) }}\n {{- ', ' if not loop.last }}\n {%- endfor %}\n {{- ']' }}\n{%- else %}\n {{- 'Any' }}\n{%- endif %}\n{%- endmacro %}\n\n{%- macro object_to_fields(json_spec, field_indent) %}\n {%- set o_ns = namespace(f = caller()) %}\n {%- for param_name, param_fields in json_spec.properties|items %}\n {%- if param_fields.enum %}\n {{- '\\n\\nclass ' + param_name|title + '(Enum):\\n' }}\n {%- for enum_option in param_fields.enum %}\n {{- ' enum_' + loop.index0|string + ' = ' + enum_option|tojson + '\\n' }}\n {%- endfor %}\n {%- elif param_fields.type == 'object' and param_fields.properties %}\n {%- call object_to_fields(param_fields, ' ') %}\n {{- '\\n\\nclass ' + param_name|title + '(BaseModel):\\n' }}\n {%- endcall %}\n {%- elif param_fields.type == 'array' and param_fields['items'] and param_fields['items'].type == 'object' and param_fields['items'].properties %}\n {%- call object_to_fields(param_fields['items'], ' ') %}\n {{- '\\n\\nclass ' + param_name|title + '(BaseModel):\\n' }}\n {%- endcall %}\n {%- endif %}\n {%- set param_default = param_fields.default|tojson if param_fields.default is string else param_fields.default|string if param_fields.default is defined else 'None' %}\n {%- set o_ns.f = o_ns.f + field_indent + param_name + ': ' %}\n {%- set o_ns.f = o_ns.f + ('Optional[' + json_to_python_type(param_name, param_fields) + ']' if param_name not in json_spec.required else json_to_python_type(param_name, param_fields)) %}\n {%- if not param_fields.title and not param_fields.description and not param_fields.pattern %}\n {%- set o_ns.f = o_ns.f + (' = ' + param_default if param_name not in json_spec.required else '') %}\n {%- else %}\n {%- set o_ns.f = o_ns.f + (' = Field(...' if param_name in json_spec.required else ' = Field(' + param_default) %}\n {%- set o_ns.f = o_ns.f + (', description=' + param_fields.description|tojson if param_fields.description else '') %}\n {%- set o_ns.f = o_ns.f + (', regex=' + param_fields.pattern|tojson if param_fields.pattern else '') %}\n {%- set o_ns.f = o_ns.f + (', title=' + param_fields.title|tojson if param_fields.title else '') %}\n {%- set o_ns.f = o_ns.f + ')' %}\n {%- endif %}\n {%- set o_ns.f = o_ns.f + '\\n' %}\n {%- endfor %}\n {{- o_ns.f }}\n{%- endmacro %}\n\n{%- macro tool_parser(tools) %}\n{%- for tool in tools %}\n {%- if tool.type is not defined or tool.type == 'function' %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {%- set tool_params = tool.parameters if tool.parameters is defined else none %}\n {%- call object_to_fields(tool_params, ' ') %}\n {{- '\\n\\ndef ' + tool.name + '(' }}\n {%- if tool_params %}\n {%- for param_name, param_fields in tool_params.properties|items %}\n {%- set param_default = param_fields.default|tojson if param_fields.default is string else param_fields.default|string if param_fields.default is defined else 'None' %}\n {{- ', ' if loop.index0 != 0 }}\n {{- param_name }}\n {{- '=' + param_default if param_name not in tool_params.required }}\n {%- endfor %}\n {%- endif %}\n {{- '):\\n \"\"\"' }}\n {{- tool.description }}\n {{- '\\n\\n Args:\\n' if tool_params else '\\n' }}\n {%- endcall %}\n {{- ' \"\"\"\\n' }}\n {%- endif %}\n{%- endfor %}\n{%- endmacro %}\n\n{%- if messages[0]['role'] == 'system' %}\n {%- set loop_messages = messages[1:] %}\n {%- set system_message = messages[0]['content'] %}\n{%- else %}\n {%- set loop_messages = messages %}\n {%- set system_message = '' %}\n{%- endif %}\n{{- '<|im_start|>system\\n' + system_message if system_message or tools }}\n{%- if tools %}\n {{- '\\n# Functions\\nHere is a list of functions that you can invoke:\\n```python\\nfrom enum import Enum\\nfrom typing import List, Dict, Optional\\nfrom pydantic import BaseModel, Field\\n\\n' }}\n {{- tool_parser(tools) }}\n {{- \"\\n```\\n\\n# Function Call Rule and Output Format\\n- If the user's question can be answered without calling any function, please answer the user's question directly. In this situation, you should return your thought and answer the user's question directly.\\n- If the user cannot be answered without calling any function, and the user does not provide enough information to call functions, please ask the user for more information. In this situation, you should return your thought and ask the user for more information.\\n- If the user's question cannot be answered without calling any function, and the user has provided enough information to call functions to solve it, you should call the functions. In this situation, the assistant should return your thought and call the functions.\\n- Use default parameters unless the user has specified otherwise.\\n- You should answer in the following format:\\n\\n<|thought_start|>\\n{explain why the user's question can be answered without calling a function or why you should ask the user for more information or why you should call one or more functions and your plan to solve the user's question.}\\n<|thought_end|>\\n<|tool_call_start|>\\n```python\\nfunc1(params_name=params_value, params_name2=params_value2...)\\nfunc2(params)\\n```\\n<|tool_call_end|>\\n{answer the user's question directly or ask the user for more information}\" }}\n{%- endif %}\n{{- '<|im_end|>\\n' if system_message or tools }}\n{%- for message in loop_messages %}\n {%- set content = message.content %}\n {%- if message.role == 'assistant' and message.tool_calls %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {{- '<|thought_start|>\\n' + message.thought + '\\n<|thought_end|>\\n' if message.thought }}\n {{- '<|tool_call_start|>\\n```python\\n' }}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- tool_call.name + '(' }}\n {%- if tool_call.arguments is defined and tool_call.arguments|length > 0 %}\n {%- for param_name, param_value in tool_call.arguments|items %}\n {{- param_name + '=' + param_value|tojson }}\n {{- ',' if not loop.last }}\n {%- endfor %}\n {%- endif %}\n {{- ')\\n' }}\n {%- endfor %}\n {{- '```\\n<|tool_call_end|>\\n' }}\n {{- content if content and not content.startswith('<|tool_call_start|>') }}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == 'assistant' and message.thought %}\n {{- '<|im_start|>' + message.role + '\\n' + '<|thought_start|>\\n' + message.thought + '\\n<|thought_end|>\\n' + content + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endfor %}\n\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}" """ tools = convert_to_tool(function, GORILLA_TO_OPENAPI, self.model_style) @@ -88,7 +54,7 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict: functions = func_doc_language_specific_pre_processing(functions, test_category) - # Hermes use its own system prompt + # MiniCPM use its own system prompt in FC mode return {"message": [], "function": functions} From d4394714ef1bfb7bf9c37879e9435860d5622783 Mon Sep 17 00:00:00 2001 From: "Huanzhi (Hans) Mao" Date: Mon, 25 Nov 2024 21:39:21 -0800 Subject: [PATCH 5/8] fix: add self.model_name_huggingfac --- .../bfcl/model_handler/oss_model/minicpm_fc.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py index 6643cc604..6025a1ac1 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py @@ -27,6 +27,8 @@ def __init__(self, model_name, temperature) -> None: super().__init__(model_name, temperature) self.stop_token_ids = [2, 73440] self.skip_special_tokens = False + self.model_name_huggingface = model_name.replace("-FC", "") + @overrides def _format_prompt(self, messages, function): From 09cb5d77f48f6f7de766a477484a572af30a3ed9 Mon Sep 17 00:00:00 2001 From: "Huanzhi (Hans) Mao" Date: Mon, 25 Nov 2024 21:49:28 -0800 Subject: [PATCH 6/8] fix: add overrides to pyproject --- berkeley-function-call-leaderboard/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/berkeley-function-call-leaderboard/pyproject.toml b/berkeley-function-call-leaderboard/pyproject.toml index 1a7c306dc..d486118b4 100644 --- a/berkeley-function-call-leaderboard/pyproject.toml +++ b/berkeley-function-call-leaderboard/pyproject.toml @@ -31,7 +31,8 @@ dependencies = [ "datamodel-code-generator==0.25.7", "google-cloud-aiplatform==1.72.0", "mpmath==1.3.0", - "tenacity==9.0.0" + "tenacity==9.0.0", + "overrides" ] [project.scripts] From a15c9aeaf832647df06e5813e2d1ed6b7e3d247c Mon Sep 17 00:00:00 2001 From: "Huanzhi (Hans) Mao" Date: Mon, 25 Nov 2024 22:53:35 -0800 Subject: [PATCH 7/8] remove conversion for param name in keyword.kwlist; update change log --- .../CHANGELOG.md | 1 + berkeley-function-call-leaderboard/README.md | 3 +- .../model_handler/oss_model/minicpm_fc.py | 64 ++----------------- 3 files changed, 8 insertions(+), 60 deletions(-) diff --git a/berkeley-function-call-leaderboard/CHANGELOG.md b/berkeley-function-call-leaderboard/CHANGELOG.md index 0995ec2ff..e9df86b6e 100644 --- a/berkeley-function-call-leaderboard/CHANGELOG.md +++ b/berkeley-function-call-leaderboard/CHANGELOG.md @@ -2,6 +2,7 @@ All notable changes to the Berkeley Function Calling Leaderboard will be documented in this file. +- [Nov 25, 2024] [#718](https://github.com/ShishirPatil/gorilla/pull/718): Add new model `openbmb/MiniCPM3-4B-FC` to the leaderboard. - [Nov 25, 2024] [#697](https://github.com/ShishirPatil/gorilla/pull/697): Add the following new models to the leaderboard: - `deepseek-ai/DeepSeek-V2.5` - `deepseek-ai/DeepSeek-Coder-V2-Instruct-0724` diff --git a/berkeley-function-call-leaderboard/README.md b/berkeley-function-call-leaderboard/README.md index 5d54bd854..add6a514e 100644 --- a/berkeley-function-call-leaderboard/README.md +++ b/berkeley-function-call-leaderboard/README.md @@ -219,7 +219,8 @@ Below is _a table of models we support_ to run our leaderboard evaluation agains |Qwen/Qwen2.5-{1.5B,7B}-Instruct 💻| Prompt| |Qwen/Qwen2-{1.5B,7B}-Instruct 💻| Prompt| |Team-ACE/ToolACE-8B 💻| Function Calling| -|openbmb/MiniCPM3-4B 💻| Function Calling| +|openbmb/MiniCPM3-4B-FC 💻| Function Calling| +|openbmb/MiniCPM3-4B 💻| Prompt| |BitAgent/GoGoAgent 💻| Prompt| Here {MODEL} 💻 means the model needs to be hosted locally and called by vllm, {MODEL} means the models that are called API calls. For models with a trailing `-FC`, it means that the model supports function-calling feature. You can check out the table summarizing feature supports among different models [here](https://gorilla.cs.berkeley.edu/blogs/8_berkeley_function_calling_leaderboard.html#prompt). diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py index 6025a1ac1..e434155b0 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py @@ -1,12 +1,8 @@ import ast import json -import keyword -import logging -import traceback from typing import Dict, List import datamodel_code_generator -from bfcl.eval_checker.ast_eval.ast_checker import convert_func_name from bfcl.model_handler.constant import GORILLA_TO_OPENAPI from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler from bfcl.model_handler.utils import ( @@ -19,8 +15,6 @@ from datamodel_code_generator.parser.jsonschema import JsonSchemaParser from overrides import overrides -logger = logging.getLogger("minicpm") - class MiniCPMFCHandler(OSSHandler): def __init__(self, model_name, temperature) -> None: @@ -29,7 +23,6 @@ def __init__(self, model_name, temperature) -> None: self.skip_special_tokens = False self.model_name_huggingface = model_name.replace("-FC", "") - @overrides def _format_prompt(self, messages, function): """ @@ -61,21 +54,6 @@ def _pre_query_processing_prompting(self, test_entry: dict) -> dict: return {"message": [], "function": functions} @overrides - def _add_execution_results_prompting( - self, - inference_data: dict, - execution_results: list[str], - model_response_data: dict, - ) -> dict: - for execution_result, decoded_model_response in zip( - execution_results, model_response_data["model_responses_decoded"] - ): - inference_data["message"].append( - {"role": "tool", "content": execution_result} - ) - - return inference_data - def decode_ast(self, result, language="Python"): msg = fc2dict(result) if ( @@ -90,6 +68,7 @@ def decode_ast(self, result, language="Python"): else: return msg["content"] + @overrides def decode_execute(self, result): msg = fc2dict(result) if ( @@ -154,7 +133,6 @@ def add_quotes(variable): + "\n```\n<|tool_call_end|>\n" + content ) - # msg["tool_call_string"] = "\n".join(tool_calls).strip() msg["content"] = content else: content = thought_prefix + content @@ -214,7 +192,6 @@ def transform_function(function: dict): content = jsonschema_to_code(function["parameters"]) if "class" in content: i = content.index("class") - # print(content[:i]) content = content[i:] classes, args = content.split("class Model(BaseModel):", 1) lint_msg = f' """\n {f_des}\n Args:\n{args}\n """\n' @@ -224,18 +201,6 @@ def transform_function(function: dict): return res -def rename_tool(tool, model_name): - properties = {} - tool["name"] = convert_func_name(tool["name"], model_name) - for key, value in tool["parameters"]["properties"].items(): - if key in keyword.kwlist: - properties["_" + key] = value - else: - properties[key] = value - tool["parameters"]["properties"] = properties - return tool - - def minicpm_input_format( messages: List[Dict], tools: List[Dict], @@ -258,11 +223,11 @@ def minicpm_input_format( header = "from enum import Enum\nfrom typing import List, Dict, Optional\nfrom pydantic import BaseModel, Field\n\n" tools_string = header for tool in tools: - tool = rename_tool(tool, model_name) try: tools_string += "\n\n" + transform_function(tool) except: - print(traceback.format_exc()) + pass + # print(traceback.format_exc()) tools_template = """# Functions Here is a list of functions that you can invoke: ```python @@ -306,7 +271,6 @@ def minicpm_input_format( def convert_function_call_to_json(string): - # print('converting', string) try: tool_calls = [] x = ast.parse(string) @@ -316,7 +280,6 @@ def convert_function_call_to_json(string): for kw in tool.value.keywords: function_args[kw.arg] = ast.literal_eval(kw.value) this_one = {"name": function_name, "arguments": function_args} - # print('converted to', this_one) tool_calls.append(this_one) return tool_calls except Exception: @@ -347,16 +310,6 @@ def fc2dict( tool_call_string = tool_call_string.lstrip("python").strip() if tool_call_string.endswith("```"): tool_call_string = tool_call_string.rstrip("```").strip() - for kw in keyword.kwlist: - tool_call_string = tool_call_string.replace( - "," + kw + "=", "," + kw + "_=" - ) - tool_call_string = tool_call_string.replace( - " " + kw + "=", " " + kw + "_=" - ) - tool_call_string = tool_call_string.replace( - "(" + kw + "=", "(" + kw + "_=" - ) parsed = ast.parse(tool_call_string) @@ -365,14 +318,8 @@ def fc2dict( calls = resolve_ast_call(elem.value) for func_name, func_args in calls.items(): - new_args = {} - for k, v in func_args.items(): - for kw in keyword.kwlist: - if k == kw + "_": - k = kw - new_args[k] = v - - this_one = {"name": func_name, "arguments": new_args} + + this_one = {"name": func_name, "arguments": func_args} tool_calls.append(this_one) return { @@ -381,7 +328,6 @@ def fc2dict( "role": "assistant", } except: - logger.error(traceback.format_exc()) return { "content": content.strip(), "role": "assistant", From 53421890c901dd6d30f65ed4215d3327cf3cee22 Mon Sep 17 00:00:00 2001 From: "Huanzhi (Hans) Mao" Date: Mon, 25 Nov 2024 22:56:19 -0800 Subject: [PATCH 8/8] fix: properly underscore function name --- .../bfcl/model_handler/oss_model/minicpm_fc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py index e434155b0..c08027722 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/minicpm_fc.py @@ -31,7 +31,7 @@ def _format_prompt(self, messages, function): tools = convert_to_tool(function, GORILLA_TO_OPENAPI, self.model_style) formated_messages = minicpm_input_format( - messages=messages, tools=function, model_name=self.model_name + messages=messages, tools=tools, model_name=self.model_name ) formatted_prompt = "" for message in formated_messages: