Skip to content

Commit

Permalink
Add JinaChat to the leaderboards (#117)
Browse files Browse the repository at this point in the history
* feat: evaluate jina chat

Signed-off-by: jupyterjazz <saba.sturua@jina.ai>

* chore: minor change

Signed-off-by: jupyterjazz <saba.sturua@jina.ai>

* refactor: apply suggestions

Signed-off-by: jupyterjazz <saba.sturua@jina.ai>

---------

Signed-off-by: jupyterjazz <saba.sturua@jina.ai>
  • Loading branch information
jupyterjazz authored Aug 9, 2023
1 parent 40282f5 commit eda4a40
Show file tree
Hide file tree
Showing 7 changed files with 4,936 additions and 0 deletions.
4,832 changes: 4,832 additions & 0 deletions results/jinachat/model_outputs.json

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions src/alpaca_eval/decoders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,11 @@ def get_fn_completions(name: Union[str, Callable]) -> Callable:

return replicate_completions

elif name == "jina_chat_completions":
from .jinachat import jina_chat_completions

return jina_chat_completions


else:
raise ValueError(f"Unknown decoder: {name}")
84 changes: 84 additions & 0 deletions src/alpaca_eval/decoders/jinachat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import logging
import multiprocessing
from functools import partial
from typing import Sequence, Optional
import requests
import json
import os
import time
from .openai import _prompt_to_chatml
from .. import utils

__all__ = ["jina_chat_completions"]


def jina_chat_completions(
prompts: Sequence[str],
num_procs: Optional[int] = 4,
) -> dict[str, list]:
"""Get jina chat completions for the given prompts. Allows additional parameters such as tokens to avoid and
tokens to favor.
Parameters
----------
prompts : list of str
Prompts to get completions for.
num_procs : int, optional
Number of parallel processes to use for decoding.
"""
n_examples = len(prompts)
api_key = os.environ.get('JINA_CHAT_API_KEY')

if n_examples == 0:
logging.info("No samples to annotate.")
return {}
else:
logging.info(f"Using `jina_chat_completions` on {n_examples} prompts.")

prompts = [_prompt_to_chatml(prompt.strip()) for prompt in prompts]
num_processes = min(multiprocessing.cpu_count(), num_procs)
with utils.Timer() as t:
with multiprocessing.Pool(processes=num_processes) as pool:
logging.info(f"Number of processes: {pool._processes}")
get_chat_completion_with_key = partial(_get_chat_completion, api_key)
completions_and_num_tokens = pool.map(get_chat_completion_with_key, prompts)

completions = [text for text, _ in completions_and_num_tokens]
num_tokens = [tokens for _, tokens in completions_and_num_tokens]

logging.info(f"Completed {n_examples} examples in {t}.")

# refer to https://chat.jina.ai/billing
price_per_example = [0.08 if msg_tokens > 300 else 0 for msg_tokens in num_tokens]
avg_time = [t.duration / n_examples] * len(completions)

return dict(completions=completions, price_per_example=price_per_example, time_per_example=avg_time)


def _get_chat_completion(api_key, prompt):
url = 'https://api.chat.jina.ai/v1/chat/completions'
headers = {
"authorization": f"Bearer {api_key}",
"content-type": "application/json"
}
json_payload = {"messages": prompt}

max_retries = 10

for attempt in range(max_retries):
try:
response = requests.post(url, headers=headers, json=json_payload)
response.raise_for_status() # Will raise an HTTPError if one occurred.
message = response.json()['choices'][0]['message']['content']
message_tokens = response.json()['usage']['completion_tokens']
return message, message_tokens
except (json.JSONDecodeError, requests.exceptions.HTTPError) as e:
logging.warning(f"Error occurred: {e}, Attempt {attempt + 1} of {max_retries}")
time.sleep(5)
if attempt + 1 == max_retries:
logging.exception("Max retries reached. Raising exception.")
logging.exception(f"Request data -> URL: {url}, Headers: {headers}, JSON Payload: {json_payload}")
raise
except Exception as e:
logging.exception(f"An unexpected error occurred: {e}")
raise
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ openchat8192-13b,79.53980099502488,1.4222439886269744,639,164,1,804,community,16
opencoderplus-15b,78.69565217391305,1.440029529188432,632,170,3,805,community,1628
vicuna-7b-v1.3,76.8414481897628,1.487520320531845,614,184,3,801,verified,1110
wizardlm-13b,75.31094527363184,1.5101858292160824,601,194,9,804,minimal,985
jinachat,74.12718204488779,1.541070307435577,592,205,5,802,community,676
airoboros-65b,73.91304347826086,1.5285333061227804,587,202,16,805,community,1512
airoboros-33b,73.29192546583852,1.55290318216736,587,212,6,805,community,1514
guanaco-65b,71.80124223602485,1.586912361158523,578,227,0,805,minimal,1249
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ llama-2-70b-chat-hf,72.91925465838509,1.5622590981080728,minimal,4,805,585,216,,
vicuna-33b-v1.3,72.36024844720497,1.5710737760483915,verified,5,805,580,220,,1479
claude-2,71.98757763975155,1.5824915958976835,minimal,1,805,579,225,1069.0,1069
chatgpt,67.70186335403726,1.642111587090117,minimal,6,805,542,257,,811
jinachat,66.64596273291924,1.65695577964565,community,5,805,534,266,,676
vicuna-13b-v1.3,66.2111801242236,1.6657907370589309,verified,2,805,532,271,,1132
wizardlm-13b,66.14906832298136,1.6584088766540706,minimal,9,805,528,268,,985
vicuna-13b,63.22981366459627,1.698243477332765,minimal,2,805,508,295,,1037
Expand Down
5 changes: 5 additions & 0 deletions src/alpaca_eval/models_configs/jina-chat/configs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
jinachat:
prompt_template: "jina-chat/prompt.txt"
fn_completions: "jina_chat_completions"
completions_kwargs: {}
pretty_name: "JinaChat"
7 changes: 7 additions & 0 deletions src/alpaca_eval/models_configs/jina-chat/prompt.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<|im_start|>system
You are a helpful, respectful, and honest assistant. Always answer as helpfully and thoroughly as possible, while being safe.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<|im_end|>
<|im_start|>user
{instruction}
<|im_end|>

0 comments on commit eda4a40

Please sign in to comment.