ShishirPatil · ShishirPatil · Apr 5, 2024 · Apr 5, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/berkeley-function-call-leaderboard/eval_checker/eval_runner.py b/berkeley-function-call-leaderboard/eval_checker/eval_runner.py
@@ -268,14 +268,27 @@ def runner(model_names, test_categories):
     # Traverse each subdirectory
     for subdir in subdirs:
 
-        # Pattern to match JSON files in this subdirectory
-        json_files_pattern = os.path.join(subdir, "*.json")
-
         model_name = subdir.split(INPUT_PATH)[1]
         if model_names is not None and model_name not in model_names:
             continue
 
         model_name_escaped = model_name.replace("_", "/")
+
+        files = [
+            f
+            for f in os.listdir(subdir)
+            if os.path.isfile(os.path.join(subdir, f)) and not f.startswith(".")
+        ]  
+        # Check if there is only one file and that file is 'result.json'
+        # If so, this is an OSS model result file and we need to special process it first
+        if len(files) == 1 and files[0] == "result.json":
+            result_json_file_path = os.path.join(subdir, "result.json")
+            oss_file_formatter(result_json_file_path, subdir)
+            print(f"Detected OSS model: {model_name}. result.json has been split into individual test category files.")
+
+        # Pattern to match JSON files in this subdirectory
+        json_files_pattern = os.path.join(subdir, "*.json")
+
         # Find and process all JSON files in the subdirectory
         for model_result_json in glob.glob(json_files_pattern):
 

diff --git a/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/eval_checker/eval_runner_helper.py
@@ -333,6 +333,24 @@
 )  # Price got from AZure, 10.879 per hour for 8 A100, 3 years reserved
 
 
+FILENAME_INDEX_MAPPING = {
+    "executable_parallel_function": (0, 49),
+    "parallel_multiple_function": (50, 249),
+    "executable_simple": (250, 349),
+    "rest": (350, 419),
+    "sql": (420, 519),
+    "parallel_function": (520, 719),
+    "chatable": (720, 919),
+    "java": (920, 1019),
+    "javascript": (1020, 1069),
+    "executable_multiple_function": (1070, 1119),
+    "simple": (1120, 1519),
+    "relevance": (1520, 1759),
+    "executable_parallel_multiple_function": (1760, 1799),
+    "multiple_function": (1800, 1999),
+}
+
+
 def extract_after_test(input_string):
     parts = input_string.split("_test_")[1].split("_result")[0].split(".json")[0]
     return parts
@@ -697,6 +715,23 @@ def update_leaderboard_table_with_score_file(leaderboard_table, score_path):
                 }
 
 
+def oss_file_formatter(input_file_path, output_dir):
+    data = load_file(input_file_path)
+    assert len(data) == 2000, "OSS result.json file should have 2000 entries."
+
+    for key, value in FILENAME_INDEX_MAPPING.items():
+        start, end = value
+        output_file = os.path.join(
+            output_dir, f"gorilla_openfunctions_v1_test_{key}_result.json"
+        )
+        with open(output_file, "w") as f:
+            original_idx = 0
+            for i in range(start, end + 1):
+                new_json = {"id": original_idx, "result": data[i]["text"]}
+                f.write(json.dumps(new_json) + "\n")
+                original_idx += 1
+
+
 def collapse_json_objects(file_path):
     with open(file_path, "r") as file:
         content = file.read()

diff --git a/berkeley-function-call-leaderboard/eval_data_compilation.py b/berkeley-function-call-leaderboard/eval_data_compilation.py
@@ -1,19 +1,37 @@
-import os
 import json
+
 data = []
 """
     Compile evaluation data into a single file
 """
-for filename in os.listdir("./data"):
-    if "gorilla" in filename:
-        with open(f"./data/{filename}", "r") as file:
-            for line in file:
-                item = json.loads(line)
-                name = filename.replace("gorilla_openfunctions_v1_test_","").replace(".json","")
-                item["question_type"] = name
-                data.append(item)
-with open("./eval_data_total.json", "a+") as file:
+
+test_files = [
+    "executable_parallel_function",
+    "parallel_multiple_function",
+    "executable_simple",
+    "rest",
+    "sql",
+    "parallel_function",
+    "chatable",
+    "java",
+    "javascript",
+    "executable_multiple_function",
+    "simple",
+    "relevance",
+    "executable_parallel_multiple_function",
+    "multiple_function",
+]
+
+for test_name in test_files:
+    with open(f"./data/gorilla_openfunctions_v1_test_{test_name}.json", "r") as file:
+        for line in file:
+            item = json.loads(line)
+            item["question_type"] = test_name
+            data.append(item)
+
+with open("./eval_data_total.json", "w") as file:
     for item in data:
         file.write(json.dumps(item))
         file.write("\n")
-print("Data successfully compiled into eval_data_total.json 🦍")
+
+print("Data successfully compiled into eval_data_total.json 🦍")