Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add JinaChat to the leaderboards #117

Merged
merged 3 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4,832 changes: 4,832 additions & 0 deletions results/jinachat/model_outputs.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions src/alpaca_eval/decoders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,11 @@ def get_fn_completions(name: Union[str, Callable]) -> Callable:

return replicate_completions

elif name == "jina_chat_completions":
from .jinachat import jina_chat_completions

return jina_chat_completions


else:
raise ValueError(f"Unknown decoder: {name}")
84 changes: 84 additions & 0 deletions src/alpaca_eval/decoders/jinachat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import logging
import multiprocessing
from functools import partial
from typing import Sequence, Optional
import requests
import json
import os
import time
from .openai import _prompt_to_chatml
from .. import utils

__all__ = ["jina_chat_completions"]


def jina_chat_completions(
prompts: Sequence[str],
num_procs: Optional[int] = 4,
) -> dict[str, list]:
"""Get jina chat completions for the given prompts. Allows additional parameters such as tokens to avoid and
tokens to favor.
Parameters
----------
prompts : list of str
Prompts to get completions for.
num_procs : int, optional
Number of parallel processes to use for decoding.
"""
n_examples = len(prompts)
api_key = os.environ.get('JINA_CHAT_API_KEY')

if n_examples == 0:
logging.info("No samples to annotate.")
return {}
else:
logging.info(f"Using `jina_chat_completions` on {n_examples} prompts.")

prompts = [_prompt_to_chatml(prompt.strip()) for prompt in prompts]
num_processes = min(multiprocessing.cpu_count(), num_procs)
with utils.Timer() as t:
with multiprocessing.Pool(processes=num_processes) as pool:
logging.info(f"Number of processes: {pool._processes}")
get_chat_completion_with_key = partial(_get_chat_completion, api_key)
completions_and_num_tokens = pool.map(get_chat_completion_with_key, prompts)

completions = [text for text, _ in completions_and_num_tokens]
num_tokens = [tokens for _, tokens in completions_and_num_tokens]

logging.info(f"Completed {n_examples} examples in {t}.")

# refer to https://chat.jina.ai/billing
price_per_example = [0.08 if msg_tokens > 300 else 0 for msg_tokens in num_tokens]
avg_time = [t.duration / n_examples] * len(completions)

return dict(completions=completions, price_per_example=price_per_example, time_per_example=avg_time)


def _get_chat_completion(api_key, prompt):
url = 'https://api.chat.jina.ai/v1/chat/completions'
headers = {
"authorization": f"Bearer {api_key}",
"content-type": "application/json"
}
json_payload = {"messages": prompt}

max_retries = 10

for attempt in range(max_retries):
try:
response = requests.post(url, headers=headers, json=json_payload)
response.raise_for_status() # Will raise an HTTPError if one occurred.
message = response.json()['choices'][0]['message']['content']
message_tokens = response.json()['usage']['completion_tokens']
return message, message_tokens
except (json.JSONDecodeError, requests.exceptions.HTTPError) as e:
logging.warning(f"Error occurred: {e}, Attempt {attempt + 1} of {max_retries}")
time.sleep(5)
if attempt + 1 == max_retries:
logging.exception("Max retries reached. Raising exception.")
logging.exception(f"Request data -> URL: {url}, Headers: {headers}, JSON Payload: {json_payload}")
raise
except Exception as e:
logging.exception(f"An unexpected error occurred: {e}")
raise
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ openchat8192-13b,79.53980099502488,1.4222439886269744,639,164,1,804,community,16
opencoderplus-15b,78.69565217391305,1.440029529188432,632,170,3,805,community,1628
vicuna-7b-v1.3,76.8414481897628,1.487520320531845,614,184,3,801,verified,1110
wizardlm-13b,75.31094527363184,1.5101858292160824,601,194,9,804,minimal,985
jinachat,74.12718204488779,1.541070307435577,592,205,5,802,community,676
airoboros-65b,73.91304347826086,1.5285333061227804,587,202,16,805,community,1512
airoboros-33b,73.29192546583852,1.55290318216736,587,212,6,805,community,1514
guanaco-65b,71.80124223602485,1.586912361158523,578,227,0,805,minimal,1249
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ llama-2-70b-chat-hf,72.91925465838509,1.5622590981080728,minimal,4,805,585,216,,
vicuna-33b-v1.3,72.36024844720497,1.5710737760483915,verified,5,805,580,220,,1479
claude-2,71.98757763975155,1.5824915958976835,minimal,1,805,579,225,1069.0,1069
chatgpt,67.70186335403726,1.642111587090117,minimal,6,805,542,257,,811
jinachat,66.64596273291924,1.65695577964565,community,5,805,534,266,,676
vicuna-13b-v1.3,66.2111801242236,1.6657907370589309,verified,2,805,532,271,,1132
wizardlm-13b,66.14906832298136,1.6584088766540706,minimal,9,805,528,268,,985
vicuna-13b,63.22981366459627,1.698243477332765,minimal,2,805,508,295,,1037
Expand Down
5 changes: 5 additions & 0 deletions src/alpaca_eval/models_configs/jina-chat/configs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
jinachat:
prompt_template: "jina-chat/prompt.txt"
fn_completions: "jina_chat_completions"
completions_kwargs: {}
pretty_name: "JinaChat"
7 changes: 7 additions & 0 deletions src/alpaca_eval/models_configs/jina-chat/prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<|im_start|>system
You are a helpful, respectful, and honest assistant. Always answer as helpfully and thoroughly as possible, while being safe.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<|im_end|>
<|im_start|>user
{instruction}
<|im_end|>