protopia-ai · sidhartha-roy · Oct 9, 2025
diff --git a/berkeley-function-call-leaderboard/bfcl_eval/constants/default_prompts.py b/berkeley-function-call-leaderboard/bfcl_eval/constants/default_prompts.py
@@ -42,14 +42,21 @@
 )
 _MARKDOWN_SYSTEM_PROMPT_TEMPLATE = "{persona}\n\n## Task\n{task}\n\n## Tool Call Format\n{tool_call_format}\n\n## Multi-turn Behavior\n{multiturn_behavior}\n\n## Available Tools\n{available_tools}"
 
+_CHAT_COMPLETIONS_SYSTEM_PROMPT_TEMPLATE = (
+    "{persona}{task}\n\n{tool_call_format}\n\n{multiturn_behavior}"
+)
+
 PROMPT_TEMPLATE_MAPPING = {
     "plaintext": _PLAINTEXT_SYSTEM_PROMPT_TEMPLATE,
     "markdown": _MARKDOWN_SYSTEM_PROMPT_TEMPLATE,
+    "chat_completions": _CHAT_COMPLETIONS_SYSTEM_PROMPT_TEMPLATE,
 }
 
 # This is the default system prompt format
 DEFAULT_SYSTEM_PROMPT_FORMAT = "ret_fmt=python&tool_call_tag=False&func_doc_fmt=json&prompt_fmt=plaintext&style=classic"
 
+CHAT_COMPLETIONS_SYSTEM_PROMPT_FORMAT = "ret_fmt=python&tool_call_tag=False&func_doc_fmt=json&prompt_fmt=chat_completions&style=classic"
+
 # NOT USED, just for reference
 # This is the prompt template for the default system prompt format
 _DEFAULT_SYSTEM_PROMPT = """You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose.

diff --git a/...eley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/openai_completion.py b/...eley-function-call-leaderboard/bfcl_eval/model_handler/api_inference/openai_completion.py
@@ -86,14 +86,22 @@ def _query_FC(self, inference_data: dict):
             "model": self.model_name,
             "temperature": self.temperature,
             "store": False,
+            "max_completion_tokens": 8192,
         }
 
         if len(tools) > 0:
             kwargs["tools"] = tools
+            kwargs["tool_choice"] = "auto"
 
         return self.generate_with_backoff(**kwargs)
 
     def _pre_query_processing_FC(self, inference_data: dict, test_entry: dict) -> dict:
+
+        # SID_DEBUG
+        functions: list = test_entry["function"]
+        test_entry_id: str = "chat_completions" # SID_DEBUG: ensure that this works for all the test_entry ids, check the dataset in bfcl.
+        test_entry["question"][0] = system_prompt_pre_processing_chat_model(test_entry["question"][0], functions, test_entry_id)
+        # SID_DEBUG
         inference_data["message"] = []
         return inference_data
 
@@ -223,6 +231,7 @@ def _query_prompting(self, inference_data: dict):
             model=self.model_name,
             temperature=self.temperature,
             store=False,
+            max_tokens=8192,
         )
 
     def _pre_query_processing_prompting(self, test_entry: dict) -> dict:

diff --git a/berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py b/berkeley-function-call-leaderboard/bfcl_eval/model_handler/utils.py
@@ -694,13 +694,21 @@ def formulate_system_prompt(format_sensitivity_config: str, functions: list[dict
         functions=formatted_function_doc,
     )
 
-    system_prompt = prompt_template.format(
+    if prompt_format == "chat_completions":
+        system_prompt = prompt_template.format(
         persona=persona,
         task=task,
         tool_call_format=tool_call_format,
-        multiturn_behavior=multiturn_behavior,
-        available_tools=available_tools,
+        multiturn_behavior=multiturn_behavior
     )
+    else:
+        system_prompt = prompt_template.format(
+            persona=persona,
+            task=task,
+            tool_call_format=tool_call_format,
+            multiturn_behavior=multiturn_behavior,
+            available_tools=available_tools,
+        )
 
     return system_prompt
 
@@ -902,7 +910,7 @@ def parse_prompt_variation_params(input_str: str) -> tuple[str, bool, str, str,
         r"ret_fmt=(?P<return_format>python|json|verbose_xml|concise_xml)"
         r"&tool_call_tag=(?P<has_tool_call_tag>True|False)"
         r"&func_doc_fmt=(?P<function_doc_format>python|xml|json)"
-        r"&prompt_fmt=(?P<prompt_format>plaintext|markdown)"
+        r"&prompt_fmt=(?P<prompt_format>plaintext|markdown|chat_completions)"
         r"&style=(?P<prompt_style>classic|experimental)"
         r"$"
     )

diff --git a/berkeley-function-call-leaderboard/bfcl_eval/utils.py b/berkeley-function-call-leaderboard/bfcl_eval/utils.py
@@ -9,6 +9,7 @@
 from bfcl_eval.constants.default_prompts import (
     ADDITIONAL_SYSTEM_PROMPT_FOR_AGENTIC_RESPONSE_FORMAT,
     DEFAULT_SYSTEM_PROMPT_FORMAT,
+    CHAT_COMPLETIONS_SYSTEM_PROMPT_FORMAT
 )
 from bfcl_eval.constants.eval_config import *
 from bfcl_eval.constants.executable_backend_config import (
@@ -57,7 +58,9 @@ def extract_prompt_format_from_id(test_entry_id: str) -> str:
     """
     Extract the prompt format from the test entry ID.
     """
-    if ":" not in test_entry_id:
+    if test_entry_id == "chat_completions":
+        return CHAT_COMPLETIONS_SYSTEM_PROMPT_FORMAT
+    elif ":" not in test_entry_id:
         return DEFAULT_SYSTEM_PROMPT_FORMAT
     else:
         assert (

diff --git a/chat_completions_tools_format.py b/chat_completions_tools_format.py
@@ -0,0 +1,278 @@
+#!/usr/bin/env python3
+"""
+Convert BFCL function format to OpenAI chat completions tools format.
+"""
+
+from typing import List, Dict, Any
+import json
+
+
+def convert_bfcl_to_openai_tools(bfcl_functions: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Convert BFCL function format to OpenAI chat completions tools format.
+
+    Args:
+        bfcl_functions: List of functions in BFCL format
+
+    Returns:
+        List of functions in OpenAI tools format
+    """
+    tools = []
+
+    for func in bfcl_functions:
+        # Extract parameters, ensuring type is 'object' instead of 'dict'
+        parameters = func.get('parameters', {}).copy()
+        if parameters.get('type') == 'dict':
+            parameters['type'] = 'object'
+
+        tool = {
+            "type": "function",
+            "function": {
+                "name": func['name'],
+                "description": func['description'],
+                "parameters": parameters
+            }
+        }
+        tools.append(tool)
+
+    return tools
+
+
+def test_convert_bfcl_to_openai_tools():
+    """Test cases for the conversion function."""
+
+    # Test case 1: Triangle area function
+    bfcl_input_1 = [
+        {
+            'description': 'Calculate the area of a triangle given its base and height. '
+                          'Note that the provided function is in Python 3 syntax.',
+            'name': 'calculate_triangle_area',
+            'parameters': {
+                'properties': {
+                    'base': {
+                        'description': 'The base of the triangle.',
+                        'type': 'integer'
+                    },
+                    'height': {
+                        'description': 'The height of the triangle.',
+                        'type': 'integer'
+                    },
+                    'unit': {
+                        'description': 'The unit of measure (defaults to \'units\' if not specified)',
+                        'type': 'string'
+                    }
+                },
+                'required': ['base', 'height'],
+                'type': 'dict'
+            }
+        }
+    ]
+
+    expected_output_1 = [
+        {
+            "type": "function",
+            "function": {
+                "name": "calculate_triangle_area",
+                "description": "Calculate the area of a triangle given its base and height. "
+                              "Note that the provided function is in Python 3 syntax.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "base": {
+                            "description": "The base of the triangle.",
+                            "type": "integer"
+                        },
+                        "height": {
+                            "description": "The height of the triangle.",
+                            "type": "integer"
+                        },
+                        "unit": {
+                            "description": "The unit of measure (defaults to 'units' if not specified)",
+                            "type": "string"
+                        }
+                    },
+                    "required": ["base", "height"]
+                }
+            }
+        }
+    ]
+
+    result_1 = convert_bfcl_to_openai_tools(bfcl_input_1)
+    assert result_1 == expected_output_1, f"Test 1 failed. Got: {json.dumps(result_1, indent=2)}"
+    print("✅ Test 1 (Triangle area function) passed")
+
+    # Test case 2: Multiple functions
+    bfcl_input_2 = [
+        {
+            'name': 'get_weather',
+            'description': 'Get current weather information for a location',
+            'parameters': {
+                'type': 'dict',
+                'properties': {
+                    'location': {'type': 'string', 'description': 'City name'},
+                    'units': {'type': 'string', 'description': 'Temperature units', 'enum': ['celsius', 'fahrenheit']}
+                },
+                'required': ['location']
+            }
+        },
+        {
+            'name': 'calculate_distance',
+            'description': 'Calculate distance between two points',
+            'parameters': {
+                'type': 'dict',
+                'properties': {
+                    'point_a': {'type': 'object', 'description': 'First point coordinates'},
+                    'point_b': {'type': 'object', 'description': 'Second point coordinates'}
+                },
+                'required': ['point_a', 'point_b']
+            }
+        }
+    ]
+
+    expected_output_2 = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get current weather information for a location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {"type": "string", "description": "City name"},
+                        "units": {"type": "string", "description": "Temperature units", "enum": ["celsius", "fahrenheit"]}
+                    },
+                    "required": ["location"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "calculate_distance",
+                "description": "Calculate distance between two points",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "point_a": {"type": "object", "description": "First point coordinates"},
+                        "point_b": {"type": "object", "description": "Second point coordinates"}
+                    },
+                    "required": ["point_a", "point_b"]
+                }
+            }
+        }
+    ]
+
+    result_2 = convert_bfcl_to_openai_tools(bfcl_input_2)
+    assert result_2 == expected_output_2, f"Test 2 failed. Got: {json.dumps(result_2, indent=2)}"
+    print("✅ Test 2 (Multiple functions) passed")
+
+    # Test case 3: Function without parameters
+    bfcl_input_3 = [
+        {
+            'name': 'get_current_time',
+            'description': 'Get the current system time',
+            'parameters': {
+                'type': 'dict',
+                'properties': {},
+                'required': []
+            }
+        }
+    ]
+
+    expected_output_3 = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_current_time",
+                "description": "Get the current system time",
+                "parameters": {
+                    "type": "object",
+                    "properties": {},
+                    "required": []
+                }
+            }
+        }
+    ]
+
+    result_3 = convert_bfcl_to_openai_tools(bfcl_input_3)
+    assert result_3 == expected_output_3, f"Test 3 failed. Got: {json.dumps(result_3, indent=2)}"
+    print("✅ Test 3 (Function without parameters) passed")
+
+    # Test case 4: Empty input
+    result_4 = convert_bfcl_to_openai_tools([])
+    assert result_4 == [], "Test 4 failed. Expected empty list"
+    print("✅ Test 4 (Empty input) passed")
+
+    # Test case 5: Function with already correct 'object' type (edge case)
+    bfcl_input_5 = [
+        {
+            'name': 'test_function',
+            'description': 'A test function',
+            'parameters': {
+                'type': 'object',  # Already correct
+                'properties': {'param1': {'type': 'string'}},
+                'required': ['param1']
+            }
+        }
+    ]
+
+    expected_output_5 = [
+        {
+            "type": "function",
+            "function": {
+                "name": "test_function",
+                "description": "A test function",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"param1": {"type": "string"}},
+                    "required": ["param1"]
+                }
+            }
+        }
+    ]
+
+    result_5 = convert_bfcl_to_openai_tools(bfcl_input_5)
+    assert result_5 == expected_output_5, f"Test 5 failed. Got: {json.dumps(result_5, indent=2)}"
+    print("✅ Test 5 (Function with correct 'object' type) passed")
+
+    print("\n🎉 All tests passed successfully!")
+
+
+def main():
+    """Main function to demonstrate usage and run tests."""
+    print("BFCL to OpenAI Tools Format Converter")
+    print("=" * 40)
+
+    # Example usage
+    example_bfcl = [
+        {
+            'description': 'Calculate the area of a triangle given its base and height. '
+                          'Note that the provided function is in Python 3 syntax.',
+            'name': 'calculate_triangle_area',
+            'parameters': {
+                'properties': {
+                    'base': {'description': 'The base of the triangle.', 'type': 'integer'},
+                    'height': {'description': 'The height of the triangle.', 'type': 'integer'},
+                    'unit': {'description': 'The unit of measure (defaults to \'units\' if not specified)', 'type': 'string'}
+                },
+                'required': ['base', 'height'],
+                'type': 'dict'
+            }
+        }
+    ]
+
+    print("\nExample Input (BFCL format):")
+    print(json.dumps(example_bfcl, indent=2))
+
+    converted = convert_bfcl_to_openai_tools(example_bfcl)
+
+    print("\nExample Output (OpenAI tools format):")
+    print(json.dumps(converted, indent=2))
+
+    print("\nRunning tests...")
+    print("-" * 20)
+    test_convert_bfcl_to_openai_tools()
+
+
+if __name__ == "__main__":
+    main()