Add JinaChat to the leaderboards (#117)

* feat: evaluate jina chat Signed-off-by: jupyterjazz <saba.sturua@jina.ai> * chore: minor change Signed-off-by: jupyterjazz <saba.sturua@jina.ai> * refactor: apply suggestions Signed-off-by: jupyterjazz <saba.sturua@jina.ai> --------- Signed-off-by: jupyterjazz <saba.sturua@jina.ai>
tatsu-lab · Aug 9, 2023 · eda4a40 · eda4a40
1 parent 40282f5
commit eda4a40
Show file tree

Hide file tree

Showing 7 changed files with 4,936 additions and 0 deletions.
diff --git a/results/jinachat/model_outputs.json b/results/jinachat/model_outputs.json
diff --git a/src/alpaca_eval/decoders/__init__.py b/src/alpaca_eval/decoders/__init__.py
@@ -67,5 +67,11 @@ def get_fn_completions(name: Union[str, Callable]) -> Callable:
 
         return replicate_completions
 
+    elif name == "jina_chat_completions":
+        from .jinachat import jina_chat_completions
+
+        return jina_chat_completions
+
+
     else:
         raise ValueError(f"Unknown decoder: {name}")
diff --git a/src/alpaca_eval/decoders/jinachat.py b/src/alpaca_eval/decoders/jinachat.py
@@ -0,0 +1,84 @@
+import logging
+import multiprocessing
+from functools import partial
+from typing import Sequence, Optional
+import requests
+import json
+import os
+import time
+from .openai import _prompt_to_chatml
+from .. import utils
+
+__all__ = ["jina_chat_completions"]
+
+
+def jina_chat_completions(
+    prompts: Sequence[str],
+    num_procs: Optional[int] = 4,
+) -> dict[str, list]:
+    """Get jina chat completions for the given prompts. Allows additional parameters such as tokens to avoid and
+    tokens to favor.
+
+    Parameters
+    ----------
+    prompts : list of str
+        Prompts to get completions for.
+    num_procs : int, optional
+        Number of parallel processes to use for decoding.
+    """
+    n_examples = len(prompts)
+    api_key = os.environ.get('JINA_CHAT_API_KEY')
+
+    if n_examples == 0:
+        logging.info("No samples to annotate.")
+        return {}
+    else:
+        logging.info(f"Using `jina_chat_completions` on {n_examples} prompts.")
+
+    prompts = [_prompt_to_chatml(prompt.strip()) for prompt in prompts]
+    num_processes = min(multiprocessing.cpu_count(), num_procs)
+    with utils.Timer() as t:
+        with multiprocessing.Pool(processes=num_processes) as pool:
+            logging.info(f"Number of processes: {pool._processes}")
+            get_chat_completion_with_key = partial(_get_chat_completion, api_key)
+            completions_and_num_tokens = pool.map(get_chat_completion_with_key, prompts)
+
+    completions = [text for text, _ in completions_and_num_tokens]
+    num_tokens = [tokens for _, tokens in completions_and_num_tokens]
+
+    logging.info(f"Completed {n_examples} examples in {t}.")
+
+    # refer to https://chat.jina.ai/billing
+    price_per_example = [0.08 if msg_tokens > 300 else 0 for msg_tokens in num_tokens]
+    avg_time = [t.duration / n_examples] * len(completions)
+
+    return dict(completions=completions, price_per_example=price_per_example, time_per_example=avg_time)
+
+
+def _get_chat_completion(api_key, prompt):
+    url = 'https://api.chat.jina.ai/v1/chat/completions'
+    headers = {
+        "authorization": f"Bearer {api_key}",
+        "content-type": "application/json"
+    }
+    json_payload = {"messages": prompt}
+
+    max_retries = 10
+
+    for attempt in range(max_retries):
+        try:
+            response = requests.post(url, headers=headers, json=json_payload)
+            response.raise_for_status()  # Will raise an HTTPError if one occurred.
+            message = response.json()['choices'][0]['message']['content']
+            message_tokens = response.json()['usage']['completion_tokens']
+            return message, message_tokens
+        except (json.JSONDecodeError, requests.exceptions.HTTPError) as e:
+            logging.warning(f"Error occurred: {e}, Attempt {attempt + 1} of {max_retries}")
+            time.sleep(5)
+            if attempt + 1 == max_retries:
+                logging.exception("Max retries reached. Raising exception.")
+                logging.exception(f"Request data -> URL: {url}, Headers: {headers}, JSON Payload: {json_payload}")
+                raise
+        except Exception as e:
+            logging.exception(f"An unexpected error occurred: {e}")
+            raise
diff --git a/src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv b/src/alpaca_eval/leaderboards/data_AlpacaEval/alpaca_eval_gpt4_leaderboard.csv
@@ -21,6 +21,7 @@ openchat8192-13b,79.53980099502488,1.4222439886269744,639,164,1,804,community,16
 opencoderplus-15b,78.69565217391305,1.440029529188432,632,170,3,805,community,1628
 vicuna-7b-v1.3,76.8414481897628,1.487520320531845,614,184,3,801,verified,1110
 wizardlm-13b,75.31094527363184,1.5101858292160824,601,194,9,804,minimal,985
+jinachat,74.12718204488779,1.541070307435577,592,205,5,802,community,676
 airoboros-65b,73.91304347826086,1.5285333061227804,587,202,16,805,community,1512
 airoboros-33b,73.29192546583852,1.55290318216736,587,212,6,805,community,1514
 guanaco-65b,71.80124223602485,1.586912361158523,578,227,0,805,minimal,1249

diff --git a/src/alpaca_eval/leaderboards/data_AlpacaEval/claude_leaderboard.csv b/src/alpaca_eval/leaderboards/data_AlpacaEval/claude_leaderboard.csv
@@ -5,6 +5,7 @@ llama-2-70b-chat-hf,72.91925465838509,1.5622590981080728,minimal,4,805,585,216,,
 vicuna-33b-v1.3,72.36024844720497,1.5710737760483915,verified,5,805,580,220,,1479
 claude-2,71.98757763975155,1.5824915958976835,minimal,1,805,579,225,1069.0,1069
 chatgpt,67.70186335403726,1.642111587090117,minimal,6,805,542,257,,811
+jinachat,66.64596273291924,1.65695577964565,community,5,805,534,266,,676
 vicuna-13b-v1.3,66.2111801242236,1.6657907370589309,verified,2,805,532,271,,1132
 wizardlm-13b,66.14906832298136,1.6584088766540706,minimal,9,805,528,268,,985
 vicuna-13b,63.22981366459627,1.698243477332765,minimal,2,805,508,295,,1037

diff --git a/src/alpaca_eval/models_configs/jina-chat/configs.yaml b/src/alpaca_eval/models_configs/jina-chat/configs.yaml
@@ -0,0 +1,5 @@
+jinachat:
+  prompt_template: "jina-chat/prompt.txt"
+  fn_completions: "jina_chat_completions"
+  completions_kwargs: {}
+  pretty_name: "JinaChat"
diff --git a/src/alpaca_eval/models_configs/jina-chat/prompt.txt b/src/alpaca_eval/models_configs/jina-chat/prompt.txt
@@ -0,0 +1,7 @@
+<|im_start|>system
+You are a helpful, respectful, and honest assistant. Always answer as helpfully and thoroughly as possible, while being safe.
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
+<|im_end|>
+<|im_start|>user
+{instruction}
+<|im_end|>