diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/default_prompts.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/default_prompts.py index ad49d469f3..daf339bbdd 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/constants/default_prompts.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/constants/default_prompts.py @@ -42,14 +42,21 @@ ) _MARKDOWN_SYSTEM_PROMPT_TEMPLATE = "{persona}\n\n## Task\n{task}\n\n## Tool Call Format\n{tool_call_format}\n\n## Multi-turn Behavior\n{multiturn_behavior}\n\n## Available Tools\n{available_tools}" +_CHAT_COMPLETIONS_SYSTEM_PROMPT_TEMPLATE = ( + "{persona}{task}\n\n{tool_call_format}\n\n{multiturn_behavior}" +) + PROMPT_TEMPLATE_MAPPING = { "plaintext": _PLAINTEXT_SYSTEM_PROMPT_TEMPLATE, "markdown": _MARKDOWN_SYSTEM_PROMPT_TEMPLATE, + "chat_completions": _CHAT_COMPLETIONS_SYSTEM_PROMPT_TEMPLATE, } # This is the default system prompt format DEFAULT_SYSTEM_PROMPT_FORMAT = "ret_fmt=python&tool_call_tag=False&func_doc_fmt=json&prompt_fmt=plaintext&style=classic" +CHAT_COMPLETIONS_SYSTEM_PROMPT_FORMAT = "ret_fmt=python&tool_call_tag=False&func_doc_fmt=json&prompt_fmt=chat_completions&style=classic" + # NOT USED, just for reference # This is the prompt template for the default system prompt format _DEFAULT_SYSTEM_PROMPT = """You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. diff --git a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/openai_completion.py b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/openai_completion.py index 357584f8db..f7d1991bae 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/openai_completion.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/openai_completion.py @@ -86,14 +86,22 @@ def _query_FC(self, inference_data: dict): "model": self.model_name, "temperature": self.temperature, "store": False, + "max_completion_tokens": 8192, } if len(tools) > 0: kwargs["tools"] = tools + kwargs["tool_choice"] = "auto" return self.generate_with_backoff(**kwargs) def _pre_query_processing_FC(self, inference_data: dict, test_entry: dict) -> dict: + + # SID_DEBUG + functions: list = test_entry["function"] + test_entry_id: str = "chat_completions" # SID_DEBUG: ensure that this works for all the test_entry ids, check the dataset in bfcl. + test_entry["question"][0] = system_prompt_pre_processing_chat_model(test_entry["question"][0], functions, test_entry_id) + # SID_DEBUG inference_data["message"] = [] return inference_data @@ -223,6 +231,7 @@ def _query_prompting(self, inference_data: dict): model=self.model_name, temperature=self.temperature, store=False, + max_tokens=8192, ) def _pre_query_processing_prompting(self, test_entry: dict) -> dict: diff --git a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py index b66144839d..4192db70ee 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py @@ -694,13 +694,21 @@ def formulate_system_prompt(format_sensitivity_config: str, functions: list[dict functions=formatted_function_doc, ) - system_prompt = prompt_template.format( + if prompt_format == "chat_completions": + system_prompt = prompt_template.format( persona=persona, task=task, tool_call_format=tool_call_format, - multiturn_behavior=multiturn_behavior, - available_tools=available_tools, + multiturn_behavior=multiturn_behavior ) + else: + system_prompt = prompt_template.format( + persona=persona, + task=task, + tool_call_format=tool_call_format, + multiturn_behavior=multiturn_behavior, + available_tools=available_tools, + ) return system_prompt @@ -902,7 +910,7 @@ def parse_prompt_variation_params(input_str: str) -> tuple[str, bool, str, str, r"ret_fmt=(?Ppython|json|verbose_xml|concise_xml)" r"&tool_call_tag=(?PTrue|False)" r"&func_doc_fmt=(?Ppython|xml|json)" - r"&prompt_fmt=(?Pplaintext|markdown)" + r"&prompt_fmt=(?Pplaintext|markdown|chat_completions)" r"&style=(?Pclassic|experimental)" r"$" ) diff --git a/berkeley-function-call-leaderboard/bfcl_eval/utils.py b/berkeley-function-call-leaderboard/bfcl_eval/utils.py index 976c86c30b..70d4028e62 100644 --- a/berkeley-function-call-leaderboard/bfcl_eval/utils.py +++ b/berkeley-function-call-leaderboard/bfcl_eval/utils.py @@ -9,6 +9,7 @@ from bfcl_eval.constants.default_prompts import ( ADDITIONAL_SYSTEM_PROMPT_FOR_AGENTIC_RESPONSE_FORMAT, DEFAULT_SYSTEM_PROMPT_FORMAT, + CHAT_COMPLETIONS_SYSTEM_PROMPT_FORMAT ) from bfcl_eval.constants.eval_config import * from bfcl_eval.constants.executable_backend_config import ( @@ -57,7 +58,9 @@ def extract_prompt_format_from_id(test_entry_id: str) -> str: """ Extract the prompt format from the test entry ID. """ - if ":" not in test_entry_id: + if test_entry_id == "chat_completions": + return CHAT_COMPLETIONS_SYSTEM_PROMPT_FORMAT + elif ":" not in test_entry_id: return DEFAULT_SYSTEM_PROMPT_FORMAT else: assert ( diff --git a/chat_completions_tools_format.py b/chat_completions_tools_format.py new file mode 100644 index 0000000000..df0fbf8be7 --- /dev/null +++ b/chat_completions_tools_format.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +""" +Convert BFCL function format to OpenAI chat completions tools format. +""" + +from typing import List, Dict, Any +import json + + +def convert_bfcl_to_openai_tools(bfcl_functions: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Convert BFCL function format to OpenAI chat completions tools format. + + Args: + bfcl_functions: List of functions in BFCL format + + Returns: + List of functions in OpenAI tools format + """ + tools = [] + + for func in bfcl_functions: + # Extract parameters, ensuring type is 'object' instead of 'dict' + parameters = func.get('parameters', {}).copy() + if parameters.get('type') == 'dict': + parameters['type'] = 'object' + + tool = { + "type": "function", + "function": { + "name": func['name'], + "description": func['description'], + "parameters": parameters + } + } + tools.append(tool) + + return tools + + +def test_convert_bfcl_to_openai_tools(): + """Test cases for the conversion function.""" + + # Test case 1: Triangle area function + bfcl_input_1 = [ + { + 'description': 'Calculate the area of a triangle given its base and height. ' + 'Note that the provided function is in Python 3 syntax.', + 'name': 'calculate_triangle_area', + 'parameters': { + 'properties': { + 'base': { + 'description': 'The base of the triangle.', + 'type': 'integer' + }, + 'height': { + 'description': 'The height of the triangle.', + 'type': 'integer' + }, + 'unit': { + 'description': 'The unit of measure (defaults to \'units\' if not specified)', + 'type': 'string' + } + }, + 'required': ['base', 'height'], + 'type': 'dict' + } + } + ] + + expected_output_1 = [ + { + "type": "function", + "function": { + "name": "calculate_triangle_area", + "description": "Calculate the area of a triangle given its base and height. " + "Note that the provided function is in Python 3 syntax.", + "parameters": { + "type": "object", + "properties": { + "base": { + "description": "The base of the triangle.", + "type": "integer" + }, + "height": { + "description": "The height of the triangle.", + "type": "integer" + }, + "unit": { + "description": "The unit of measure (defaults to 'units' if not specified)", + "type": "string" + } + }, + "required": ["base", "height"] + } + } + } + ] + + result_1 = convert_bfcl_to_openai_tools(bfcl_input_1) + assert result_1 == expected_output_1, f"Test 1 failed. Got: {json.dumps(result_1, indent=2)}" + print("✅ Test 1 (Triangle area function) passed") + + # Test case 2: Multiple functions + bfcl_input_2 = [ + { + 'name': 'get_weather', + 'description': 'Get current weather information for a location', + 'parameters': { + 'type': 'dict', + 'properties': { + 'location': {'type': 'string', 'description': 'City name'}, + 'units': {'type': 'string', 'description': 'Temperature units', 'enum': ['celsius', 'fahrenheit']} + }, + 'required': ['location'] + } + }, + { + 'name': 'calculate_distance', + 'description': 'Calculate distance between two points', + 'parameters': { + 'type': 'dict', + 'properties': { + 'point_a': {'type': 'object', 'description': 'First point coordinates'}, + 'point_b': {'type': 'object', 'description': 'Second point coordinates'} + }, + 'required': ['point_a', 'point_b'] + } + } + ] + + expected_output_2 = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather information for a location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City name"}, + "units": {"type": "string", "description": "Temperature units", "enum": ["celsius", "fahrenheit"]} + }, + "required": ["location"] + } + } + }, + { + "type": "function", + "function": { + "name": "calculate_distance", + "description": "Calculate distance between two points", + "parameters": { + "type": "object", + "properties": { + "point_a": {"type": "object", "description": "First point coordinates"}, + "point_b": {"type": "object", "description": "Second point coordinates"} + }, + "required": ["point_a", "point_b"] + } + } + } + ] + + result_2 = convert_bfcl_to_openai_tools(bfcl_input_2) + assert result_2 == expected_output_2, f"Test 2 failed. Got: {json.dumps(result_2, indent=2)}" + print("✅ Test 2 (Multiple functions) passed") + + # Test case 3: Function without parameters + bfcl_input_3 = [ + { + 'name': 'get_current_time', + 'description': 'Get the current system time', + 'parameters': { + 'type': 'dict', + 'properties': {}, + 'required': [] + } + } + ] + + expected_output_3 = [ + { + "type": "function", + "function": { + "name": "get_current_time", + "description": "Get the current system time", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + } + ] + + result_3 = convert_bfcl_to_openai_tools(bfcl_input_3) + assert result_3 == expected_output_3, f"Test 3 failed. Got: {json.dumps(result_3, indent=2)}" + print("✅ Test 3 (Function without parameters) passed") + + # Test case 4: Empty input + result_4 = convert_bfcl_to_openai_tools([]) + assert result_4 == [], "Test 4 failed. Expected empty list" + print("✅ Test 4 (Empty input) passed") + + # Test case 5: Function with already correct 'object' type (edge case) + bfcl_input_5 = [ + { + 'name': 'test_function', + 'description': 'A test function', + 'parameters': { + 'type': 'object', # Already correct + 'properties': {'param1': {'type': 'string'}}, + 'required': ['param1'] + } + } + ] + + expected_output_5 = [ + { + "type": "function", + "function": { + "name": "test_function", + "description": "A test function", + "parameters": { + "type": "object", + "properties": {"param1": {"type": "string"}}, + "required": ["param1"] + } + } + } + ] + + result_5 = convert_bfcl_to_openai_tools(bfcl_input_5) + assert result_5 == expected_output_5, f"Test 5 failed. Got: {json.dumps(result_5, indent=2)}" + print("✅ Test 5 (Function with correct 'object' type) passed") + + print("\n🎉 All tests passed successfully!") + + +def main(): + """Main function to demonstrate usage and run tests.""" + print("BFCL to OpenAI Tools Format Converter") + print("=" * 40) + + # Example usage + example_bfcl = [ + { + 'description': 'Calculate the area of a triangle given its base and height. ' + 'Note that the provided function is in Python 3 syntax.', + 'name': 'calculate_triangle_area', + 'parameters': { + 'properties': { + 'base': {'description': 'The base of the triangle.', 'type': 'integer'}, + 'height': {'description': 'The height of the triangle.', 'type': 'integer'}, + 'unit': {'description': 'The unit of measure (defaults to \'units\' if not specified)', 'type': 'string'} + }, + 'required': ['base', 'height'], + 'type': 'dict' + } + } + ] + + print("\nExample Input (BFCL format):") + print(json.dumps(example_bfcl, indent=2)) + + converted = convert_bfcl_to_openai_tools(example_bfcl) + + print("\nExample Output (OpenAI tools format):") + print(json.dumps(converted, indent=2)) + + print("\nRunning tests...") + print("-" * 20) + test_convert_bfcl_to_openai_tools() + + +if __name__ == "__main__": + main() \ No newline at end of file