Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump tqdm from 4.65.0 to 4.66.3 in /inference #2

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
Open
15 changes: 5 additions & 10 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,15 @@ goex/docker/misc/images.json

################## Berkley Function Call Leaderboard ##########################

# Ignore tree-sitter
berkeley-function-call-leaderboard/eval_checker/tree-sitter-java
berkeley-function-call-leaderboard/eval_checker/tree-sitter-javascript
berkeley-function-call-leaderboard/tree-sitter-java
berkeley-function-call-leaderboard/tree-sitter-javascript

# Ignore aggregated eval data (used for OSS models)
berkeley-function-call-leaderboard/eval_data_total.json

# Ignore inference results
berkeley-function-call-leaderboard/result/

# Ignore leaderboard score
berkeley-function-call-leaderboard/score/

# Ignore environment variables
berkeley-function-call-leaderboard/.env
!berkeley-function-call-leaderboard/.env.example

.direnv/
.venv
.venv
4 changes: 4 additions & 0 deletions berkeley-function-call-leaderboard/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ ANTHROPIC_API_KEY=
NVIDIA_API_KEY=nvapi-XXXXXX
YI_API_KEY=

# We use Vertex AI to inference Google Gemini models
VERTEX_AI_PROJECT_ID=
VERTEX_AI_LOCATION=

COHERE_API_KEY=
USE_COHERE_OPTIMIZATION=False # True/False

Expand Down
124 changes: 124 additions & 0 deletions berkeley-function-call-leaderboard/CHANGELOG.md

Large diffs are not rendered by default.

305 changes: 112 additions & 193 deletions berkeley-function-call-leaderboard/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import argparse
import os
from bfcl.eval_checker.custom_exception import NoAPIKeyError
from bfcl.eval_checker.executable_eval.custom_exception import NoAPIKeyError
from dotenv import load_dotenv

parser = argparse.ArgumentParser(description="Replace placeholders in the function credential config file.")
Expand Down
166 changes: 166 additions & 0 deletions berkeley-function-call-leaderboard/bfcl/constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
VERSION_PREFIX = "BFCL_v3"

TEST_FILE_MAPPING = {
"exec_simple": f"{VERSION_PREFIX}_exec_simple.json",
"exec_parallel": f"{VERSION_PREFIX}_exec_parallel.json",
"exec_multiple": f"{VERSION_PREFIX}_exec_multiple.json",
"exec_parallel_multiple": f"{VERSION_PREFIX}_exec_parallel_multiple.json",
"simple": f"{VERSION_PREFIX}_simple.json",
"irrelevance": f"{VERSION_PREFIX}_irrelevance.json",
"parallel": f"{VERSION_PREFIX}_parallel.json",
"multiple": f"{VERSION_PREFIX}_multiple.json",
"parallel_multiple": f"{VERSION_PREFIX}_parallel_multiple.json",
"java": f"{VERSION_PREFIX}_java.json",
"javascript": f"{VERSION_PREFIX}_javascript.json",
"rest": f"{VERSION_PREFIX}_rest.json",
"sql": f"{VERSION_PREFIX}_sql.json",
"chatable": f"{VERSION_PREFIX}_chatable.json",
# Live Datasets
"live_simple": f"{VERSION_PREFIX}_live_simple.json",
"live_multiple": f"{VERSION_PREFIX}_live_multiple.json",
"live_parallel": f"{VERSION_PREFIX}_live_parallel.json",
"live_parallel_multiple": f"{VERSION_PREFIX}_live_parallel_multiple.json",
"live_irrelevance": f"{VERSION_PREFIX}_live_irrelevance.json",
"live_relevance": f"{VERSION_PREFIX}_live_relevance.json",
# Multi-turn Datasets
"multi_turn_base": f"{VERSION_PREFIX}_multi_turn_base.json",
"multi_turn_miss_func": f"{VERSION_PREFIX}_multi_turn_miss_func.json",
"multi_turn_miss_param": f"{VERSION_PREFIX}_multi_turn_miss_param.json",
"multi_turn_long_context": f"{VERSION_PREFIX}_multi_turn_long_context.json",
"multi_turn_composite": f"{VERSION_PREFIX}_multi_turn_composite.json",
}

TEST_COLLECTION_MAPPING = {
"all": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"java",
"javascript",
"rest",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
"multi_turn_base",
"multi_turn_miss_func",
"multi_turn_miss_param",
"multi_turn_long_context",
"multi_turn_composite",
],
"multi_turn": [
"multi_turn_base",
"multi_turn_miss_func",
"multi_turn_miss_param",
"multi_turn_long_context",
"multi_turn_composite",
],
"single_turn": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"java",
"javascript",
"rest",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
"live": [
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
"non_live": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"java",
"javascript",
"rest",
],
# TODO: Update this mapping
"ast": [
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"java",
"javascript",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
"executable": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"rest",
],
"non_python": [
"java",
"javascript",
],
"python": [
"exec_simple",
"exec_parallel",
"exec_multiple",
"exec_parallel_multiple",
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"rest",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
"python_ast": [
"simple",
"irrelevance",
"parallel",
"multiple",
"parallel_multiple",
"live_simple",
"live_multiple",
"live_parallel",
"live_parallel_multiple",
"live_irrelevance",
"live_relevance",
],
}
Loading