From 0a49e99776601c6f2d8e961ef7668d696c67cee3 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Thu, 1 Feb 2024 19:36:39 +0100 Subject: [PATCH 01/32] switch to YAML for benchmark data .. only local case, not the encrypted data removed csv parsing to DF, instead use dict from yaml directly introduced hash for identifying exact test data configuration --- benchmark/conftest.py | 23 ++--------- benchmark/load_dataset.py | 41 ++++++++++++++++++-- benchmark/results/entity_selection.csv | 1 + benchmark/test_biocypher_query_generation.py | 2 +- 4 files changed, 43 insertions(+), 24 deletions(-) diff --git a/benchmark/conftest.py b/benchmark/conftest.py index 48300288..972de795 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -281,36 +281,21 @@ def pytest_generate_tests(metafunc): If fixture is part of test declaration, the test is parametrized. """ # Load the data file - data_file = BENCHMARK_DATASET["./data/benchmark_data.csv"] - data_file["index"] = data_file.index - - # Initialize a dictionary to collect rows for each test type - test_rows = { - "biocypher_query_generation": [], - "rag_interpretation": [], - "text_extraction": [], - } - - # Iterate over each row in the DataFrame - for index, row in data_file.iterrows(): - test_type = row["test_type"] - if test_type in test_rows: - # Add the row to the list for this test type - test_rows[test_type].append(row) + data_file = BENCHMARK_DATASET["./data/benchmark_data.yaml"] # Parametrize the fixtures with the collected rows if "test_data_biocypher_query_generation" in metafunc.fixturenames: metafunc.parametrize( "test_data_biocypher_query_generation", - test_rows["biocypher_query_generation"], + data_file["biocypher_query_generation"], ) if "test_data_rag_interpretation" in metafunc.fixturenames: metafunc.parametrize( "test_data_rag_interpretation", - test_rows["rag_interpretation"], + data_file["rag_interpretation"], ) if "test_data_text_extraction" in metafunc.fixturenames: metafunc.parametrize( "test_data_text_extraction", - test_rows["text_extraction"], + data_file["text_extraction"], ) diff --git a/benchmark/load_dataset.py b/benchmark/load_dataset.py index 6979ed4d..f6c34fa3 100644 --- a/benchmark/load_dataset.py +++ b/benchmark/load_dataset.py @@ -1,3 +1,4 @@ +import hashlib import io import json import os @@ -51,9 +52,11 @@ def _load_test_data_from_this_repository(): dict: keys are filenames and values are test data. """ print("Using public test data from this repository for benchmarking.") - test_data = {} directory_path = "./benchmark/data" files_in_directory = _get_all_files(directory_path) + + # old csv implementation + test_data_csv = {} for file_path in files_in_directory: if file_path.endswith(".csv"): df = pd.read_csv(file_path, sep=";") @@ -68,11 +71,41 @@ def _load_test_data_from_this_repository(): "system_messages", ], ) - test_data[file_path.replace("./benchmark/", "./")] = df + test_data_csv[file_path.replace("./benchmark/", "./")] = df elif file_path.endswith(".yaml"): - test_data[file_path.replace("./benchmark/", "./")] = yaml.safe_load( - file_path + test_data_csv[file_path.replace("./benchmark/", "./")] = ( + yaml.safe_load(file_path) ) + + # new yaml implementation + test_data = {} + for file_path in files_in_directory: + if file_path.endswith(".yaml"): + with open(file_path, "r") as stream: + try: + yaml_data = yaml.safe_load(stream) + + # every dictionary in the list of dictionaries that is under + # any top level key gets a hash field that is the md5 hash + # of the dictionary + + for key in yaml_data.keys(): + if isinstance(yaml_data[key], list): + for i in range(len(yaml_data[key])): + if isinstance(yaml_data[key][i], dict): + yaml_data[key][i]["hash"] = hashlib.md5( + json.dumps(yaml_data[key][i]).encode( + "utf-8" + ) + ).hexdigest() + + test_data[file_path.replace("./benchmark/", "./")] = ( + yaml_data + ) + + except yaml.YAMLError as exc: + print(exc) + return test_data diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 63d536cf..1a5db37c 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -1,6 +1,7 @@ model_name,subtask,score,iterations gpt-3.5-turbo,0_single_word,2.0/2,2 gpt-3.5-turbo,1_multi_word,2.0/2,2 +gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 gpt-4,0_single_word,2.0/2,2 gpt-4,1_multi_word,2.0/2,2 llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/2,2 diff --git a/benchmark/test_biocypher_query_generation.py b/benchmark/test_biocypher_query_generation.py index a641855a..b3e09c27 100644 --- a/benchmark/test_biocypher_query_generation.py +++ b/benchmark/test_biocypher_query_generation.py @@ -30,7 +30,7 @@ def get_test_data(test_data_biocypher_query_generation: list) -> tuple: test_data_biocypher_query_generation["properties"], test_data_biocypher_query_generation["parts_of_query"], test_data_biocypher_query_generation["test_case_purpose"], - test_data_biocypher_query_generation["index"], + test_data_biocypher_query_generation["hash"], ) From 2f61f38ebf09604f0fb7c07587d82bac188591d8 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 02:12:19 +0100 Subject: [PATCH 02/32] some query benchmarks --- benchmark/conftest.py | 2 +- .../results/end_to_end_query_generation.csv | 52 +++++++++++++++++++ benchmark/results/entity_selection.csv | 51 ++++++++++++++++++ benchmark/results/property_exists.csv | 52 +++++++++++++++++++ benchmark/results/property_selection.csv | 52 +++++++++++++++++++ benchmark/results/query_generation.csv | 52 +++++++++++++++++++ benchmark/results/relationship_selection.csv | 52 +++++++++++++++++++ 7 files changed, 312 insertions(+), 1 deletion(-) diff --git a/benchmark/conftest.py b/benchmark/conftest.py index 972de795..eac6514a 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -28,7 +28,7 @@ "model_size_in_billions": [ 7, 13, - 70, + # 70, ], "model_format": "ggmlv3", "quantization": [ diff --git a/benchmark/results/end_to_end_query_generation.csv b/benchmark/results/end_to_end_query_generation.csv index 98188a8f..339d56e1 100644 --- a/benchmark/results/end_to_end_query_generation.csv +++ b/benchmark/results/end_to_end_query_generation.csv @@ -1,29 +1,81 @@ model_name,subtask,score,iterations gpt-3.5-turbo,0_single_word,7.0/7,2 +gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 gpt-3.5-turbo,1_multi_word,7.0/7,2 +gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 gpt-4,0_single_word,7.0/7,2 gpt-4,1_multi_word,7.0/7,2 llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/7,2 +llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/7,2 +llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q4_0,0_single_word,0.0/7,2 +llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/7,2 +llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q5_0,0_single_word,0.0/7,2 +llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.0/7,2 +llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q8_0,0_single_word,0.0/7,2 +llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q8_0,1_multi_word,0.0/7,2 +llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q2_K,0_single_word,0.0/7,2 +llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q2_K,1_multi_word,0.0/7,2 +llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q4_0,0_single_word,0.0/7,2 +llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q4_0,1_multi_word,0.0/7,2 +llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q5_0,0_single_word,0.0/7,2 +llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q5_0,1_multi_word,0.0/7,2 +llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q8_0,0_single_word,0.0/7,2 +llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q8_0,1_multi_word,0.0/7,2 +llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,0.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,0.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,0.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,0.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,0.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,0.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,0.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 1a5db37c..55afa905 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -1,30 +1,81 @@ model_name,subtask,score,iterations gpt-3.5-turbo,0_single_word,2.0/2,2 +gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 gpt-3.5-turbo,1_multi_word,2.0/2,2 gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 gpt-4,0_single_word,2.0/2,2 gpt-4,1_multi_word,2.0/2,2 llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q4_0,0_single_word,1.0/2,2 +llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 +llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q5_0,0_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q8_0,0_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q8_0,1_multi_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:7:ggmlv3:q2_K,0_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q2_K,1_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 +llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_0,0_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_0,1_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 +llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q5_0,0_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q5_0,1_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q8_0,0_single_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q8_0,1_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,0.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,0.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,0.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,0.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,1.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,0.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,1.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 diff --git a/benchmark/results/property_exists.csv b/benchmark/results/property_exists.csv index 5200f606..ee30234b 100644 --- a/benchmark/results/property_exists.csv +++ b/benchmark/results/property_exists.csv @@ -1,29 +1,81 @@ model_name,subtask,score,iterations gpt-3.5-turbo,0_single_word,2.0/2,2 +gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 gpt-3.5-turbo,1_multi_word,2.0/2,2 +gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 gpt-4,0_single_word,2.0/2,2 gpt-4,1_multi_word,2.0/2,2 llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/1,2 +llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/0,2 llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/1,2 +llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/0,2 +llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q4_0,0_single_word,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/0,2 llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/1,2 +llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/1,2 +llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 +llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q5_0,0_single_word,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.5/0,2 llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.5/1,2 +llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 +llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 +llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 +llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q8_0,0_single_word,1.0/1,2 +llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 llama-2-chat:13:ggmlv3:q8_0,1_multi_word,1.5/2,2 +llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:7:ggmlv3:q2_K,0_single_word,0.0/1,2 +llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/0,2 llama-2-chat:7:ggmlv3:q2_K,1_multi_word,1.0/1,2 +llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 +llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 +llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 llama-2-chat:7:ggmlv3:q4_0,0_single_word,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 llama-2-chat:7:ggmlv3:q4_0,1_multi_word,2.0/2,2 +llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/0,2 +llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 +llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:7:ggmlv3:q5_0,0_single_word,1.5/3,2 +llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q5_0,1_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 +llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.5/3,2 +llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 llama-2-chat:7:ggmlv3:q8_0,0_single_word,1.0/1,2 +llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q8_0,1_multi_word,1.0/2,2 +llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/1,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.5/0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,2.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,1.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,1.5/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,1.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,1.5/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,1.0/2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 diff --git a/benchmark/results/property_selection.csv b/benchmark/results/property_selection.csv index cd9befb2..fae31b25 100644 --- a/benchmark/results/property_selection.csv +++ b/benchmark/results/property_selection.csv @@ -1,29 +1,81 @@ model_name,subtask,score,iterations gpt-3.5-turbo,0_single_word,6.0/10,2 +gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/7,2 gpt-3.5-turbo,1_multi_word,5.0/7,2 +gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,6.0/10,2 gpt-4,0_single_word,6.0/10,2 gpt-4,1_multi_word,7.0/7,2 llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,0_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_0,0_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q8_0,0_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q8_0,1_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q2_K,0_single_word,2.0/10,2 +llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/7,2 llama-2-chat:7:ggmlv3:q2_K,1_multi_word,2.0/7,2 +llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,2.0/10,2 +llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_0,0_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_0,1_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_0,0_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_0,1_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q8_0,0_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q8_0,1_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 diff --git a/benchmark/results/query_generation.csv b/benchmark/results/query_generation.csv index 364fc160..a25b11a8 100644 --- a/benchmark/results/query_generation.csv +++ b/benchmark/results/query_generation.csv @@ -1,29 +1,81 @@ model_name,subtask,score,iterations gpt-3.5-turbo,0_single_word,7.0/7,2 +gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 gpt-3.5-turbo,1_multi_word,7.0/7,2 +gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 gpt-4,0_single_word,7.0/7,2 gpt-4,1_multi_word,6.5/7,2 llama-2-chat:13:ggmlv3:q2_K,0_single_word,4.0/7,2 +llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 llama-2-chat:13:ggmlv3:q2_K,1_multi_word,4.0/7,2 +llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 +llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 llama-2-chat:13:ggmlv3:q4_0,0_single_word,6.0/7,2 +llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 llama-2-chat:13:ggmlv3:q4_0,1_multi_word,4.0/7,2 +llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 +llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 +llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 +llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 llama-2-chat:13:ggmlv3:q5_0,0_single_word,7.0/7,2 +llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:13:ggmlv3:q5_0,1_multi_word,5.5/7,2 +llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 +llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 +llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 +llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q8_0,0_single_word,6.0/7,2 +llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 llama-2-chat:13:ggmlv3:q8_0,1_multi_word,7.0/7,2 +llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:7:ggmlv3:q2_K,0_single_word,4.5/7,2 +llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 llama-2-chat:7:ggmlv3:q2_K,1_multi_word,4.0/7,2 +llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 +llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 llama-2-chat:7:ggmlv3:q4_0,0_single_word,3.5/7,2 +llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 llama-2-chat:7:ggmlv3:q4_0,1_multi_word,5.0/7,2 +llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,4.5/8,2 +llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 +llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.5/8,2 +llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,4.0/8,2 +llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 +llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:7:ggmlv3:q5_0,0_single_word,6.0/7,2 +llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:7:ggmlv3:q5_0,1_multi_word,6.0/7,2 +llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 +llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,4.5/8,2 +llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 +llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:7:ggmlv3:q8_0,0_single_word,6.0/7,2 +llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:7:ggmlv3:q8_0,1_multi_word,6.0/7,2 +llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,5.5/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.5/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,6.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,7.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,6.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,7.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,7.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,7.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,7.0/7,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 diff --git a/benchmark/results/relationship_selection.csv b/benchmark/results/relationship_selection.csv index c14a5d88..47af2d44 100644 --- a/benchmark/results/relationship_selection.csv +++ b/benchmark/results/relationship_selection.csv @@ -1,29 +1,81 @@ model_name,subtask,score,iterations gpt-3.5-turbo,0_single_word,3.0/3,2 +gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,3.0/3,2 gpt-3.5-turbo,1_multi_word,3.0/3,2 +gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 gpt-4,0_single_word,3.0/3,2 gpt-4,1_multi_word,3.0/3,2 llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,0_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_0,0_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q8_0,0_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q8_0,1_multi_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q2_K,0_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q2_K,1_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 llama-2-chat:7:ggmlv3:q4_0,0_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_0,1_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_0,0_single_word,3.0/3,2 +llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_0,1_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 +llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q8_0,0_single_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q8_0,1_multi_word,0.0/3,2 +llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,1.5/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,0.0/3,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 From 5c2beddf058e453a14d53b749df3307fc68f2445 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 11:18:47 +0100 Subject: [PATCH 03/32] remove prior non hashed results --- .../results/end_to_end_query_generation.csv | 28 ------------------- benchmark/results/entity_selection.csv | 28 ------------------- benchmark/results/property_exists.csv | 28 ------------------- benchmark/results/property_selection.csv | 28 ------------------- benchmark/results/query_generation.csv | 28 ------------------- benchmark/results/relationship_selection.csv | 28 ------------------- 6 files changed, 168 deletions(-) diff --git a/benchmark/results/end_to_end_query_generation.csv b/benchmark/results/end_to_end_query_generation.csv index 339d56e1..fe0e171d 100644 --- a/benchmark/results/end_to_end_query_generation.csv +++ b/benchmark/results/end_to_end_query_generation.csv @@ -1,19 +1,11 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,0_single_word,7.0/7,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -gpt-3.5-turbo,1_multi_word,7.0/7,2 gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -gpt-4,0_single_word,7.0/7,2 -gpt-4,1_multi_word,7.0/7,2 -llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/7,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/7,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_0,0_single_word,0.0/7,2 llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/7,2 llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 @@ -21,27 +13,19 @@ llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8, llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q5_0,0_single_word,0.0/7,2 llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.0/7,2 llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q8_0,0_single_word,0.0/7,2 llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q8_0,1_multi_word,0.0/7,2 llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q2_K,0_single_word,0.0/7,2 llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q2_K,1_multi_word,0.0/7,2 llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_0,0_single_word,0.0/7,2 llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_0,1_multi_word,0.0/7,2 llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 @@ -49,33 +33,21 @@ llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q5_0,0_single_word,0.0/7,2 llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q5_0,1_multi_word,0.0/7,2 llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q8_0,0_single_word,0.0/7,2 llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q8_0,1_multi_word,0.0/7,2 llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,0.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,0.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,0.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,0.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,0.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,0.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,0.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 55afa905..5ccda2ae 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -1,19 +1,11 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,0_single_word,2.0/2,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -gpt-3.5-turbo,1_multi_word,2.0/2,2 gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 -gpt-4,0_single_word,2.0/2,2 -gpt-4,1_multi_word,2.0/2,2 -llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_0,0_single_word,1.0/2,2 llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 @@ -21,27 +13,19 @@ llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2, llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q5_0,0_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q8_0,0_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q8_0,1_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:7:ggmlv3:q2_K,0_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q2_K,1_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_0,0_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_0,1_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 @@ -49,33 +33,21 @@ llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q5_0,0_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q5_0,1_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q8_0,0_single_word,1.0/2,2 llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q8_0,1_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,0.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 diff --git a/benchmark/results/property_exists.csv b/benchmark/results/property_exists.csv index ee30234b..48e07cd1 100644 --- a/benchmark/results/property_exists.csv +++ b/benchmark/results/property_exists.csv @@ -1,19 +1,11 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,0_single_word,2.0/2,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -gpt-3.5-turbo,1_multi_word,2.0/2,2 gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 -gpt-4,0_single_word,2.0/2,2 -gpt-4,1_multi_word,2.0/2,2 -llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/1,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/0,2 -llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/1,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/0,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,0_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/0,2 -llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/1,2 llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/1,2 llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 @@ -21,27 +13,19 @@ llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2, llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,0_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.5/0,2 -llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.5/1,2 llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,0_single_word,1.0/1,2 llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 -llama-2-chat:13:ggmlv3:q8_0,1_multi_word,1.5/2,2 llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,0_single_word,0.0/1,2 llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/0,2 -llama-2-chat:7:ggmlv3:q2_K,1_multi_word,1.0/1,2 llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 -llama-2-chat:7:ggmlv3:q4_0,0_single_word,1.0/1,2 llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -llama-2-chat:7:ggmlv3:q4_0,1_multi_word,2.0/2,2 llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/0,2 llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 @@ -49,33 +33,21 @@ llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,0_single_word,1.5/3,2 llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q5_0,1_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.5/3,2 llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,0_single_word,1.0/1,2 llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q8_0,1_multi_word,1.0/2,2 llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/1,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,1.0/1,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.5/0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,2.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,1.5/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,1.5/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,1.0/2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 diff --git a/benchmark/results/property_selection.csv b/benchmark/results/property_selection.csv index fae31b25..a12e0d85 100644 --- a/benchmark/results/property_selection.csv +++ b/benchmark/results/property_selection.csv @@ -1,19 +1,11 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,0_single_word,6.0/10,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/7,2 -gpt-3.5-turbo,1_multi_word,5.0/7,2 gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,6.0/10,2 -gpt-4,0_single_word,6.0/10,2 -gpt-4,1_multi_word,7.0/7,2 -llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_0,0_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 @@ -21,27 +13,19 @@ llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3, llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_0,0_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q8_0,0_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q8_0,1_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q2_K,0_single_word,2.0/10,2 llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/7,2 -llama-2-chat:7:ggmlv3:q2_K,1_multi_word,2.0/7,2 llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,2.0/10,2 llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_0,0_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_0,1_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 @@ -49,33 +33,21 @@ llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_0,0_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_0,1_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,0_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,1_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 diff --git a/benchmark/results/query_generation.csv b/benchmark/results/query_generation.csv index a25b11a8..118d463a 100644 --- a/benchmark/results/query_generation.csv +++ b/benchmark/results/query_generation.csv @@ -1,19 +1,11 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,0_single_word,7.0/7,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -gpt-3.5-turbo,1_multi_word,7.0/7,2 gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -gpt-4,0_single_word,7.0/7,2 -gpt-4,1_multi_word,6.5/7,2 -llama-2-chat:13:ggmlv3:q2_K,0_single_word,4.0/7,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 -llama-2-chat:13:ggmlv3:q2_K,1_multi_word,4.0/7,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q4_0,0_single_word,6.0/7,2 llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 -llama-2-chat:13:ggmlv3:q4_0,1_multi_word,4.0/7,2 llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 @@ -21,27 +13,19 @@ llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8, llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 -llama-2-chat:13:ggmlv3:q5_0,0_single_word,7.0/7,2 llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q5_0,1_multi_word,5.5/7,2 llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q8_0,0_single_word,6.0/7,2 llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q8_0,1_multi_word,7.0/7,2 llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q2_K,0_single_word,4.5/7,2 llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 -llama-2-chat:7:ggmlv3:q2_K,1_multi_word,4.0/7,2 llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 -llama-2-chat:7:ggmlv3:q4_0,0_single_word,3.5/7,2 llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 -llama-2-chat:7:ggmlv3:q4_0,1_multi_word,5.0/7,2 llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,4.5/8,2 llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 @@ -49,33 +33,21 @@ llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.5/8,2 llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,4.0/8,2 llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q5_0,0_single_word,6.0/7,2 llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q5_0,1_multi_word,6.0/7,2 llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,4.5/8,2 llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q8_0,0_single_word,6.0/7,2 llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q8_0,1_multi_word,6.0/7,2 llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,5.5/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.5/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,6.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,7.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,6.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,7.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,7.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,7.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,7.0/7,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 diff --git a/benchmark/results/relationship_selection.csv b/benchmark/results/relationship_selection.csv index 47af2d44..9f340f1c 100644 --- a/benchmark/results/relationship_selection.csv +++ b/benchmark/results/relationship_selection.csv @@ -1,19 +1,11 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,0_single_word,3.0/3,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,3.0/3,2 -gpt-3.5-turbo,1_multi_word,3.0/3,2 gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 -gpt-4,0_single_word,3.0/3,2 -gpt-4,1_multi_word,3.0/3,2 -llama-2-chat:13:ggmlv3:q2_K,0_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q2_K,1_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_0,0_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_0,1_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 @@ -21,27 +13,19 @@ llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3, llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_0,0_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_0,1_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q8_0,0_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q8_0,1_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q2_K,0_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q2_K,1_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 -llama-2-chat:7:ggmlv3:q4_0,0_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_0,1_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 @@ -49,33 +33,21 @@ llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_0,0_single_word,3.0/3,2 llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_0,1_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,0_single_word,0.0/3,2 llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,1_multi_word,0.0/3,2 llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1_multi_word,1.5/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0_single_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1_multi_word,0.0/3,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 From d402b1b0b4437c46a87cc73b8a69ca3c789ecb90 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 11:19:17 +0100 Subject: [PATCH 04/32] rename RAG results --- ...agments.csv => old-explicit_relevance_of_single_fragments.csv} | 0 ...ments.csv => old-implicit_relevance_of_multiple_fragments.csv} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename benchmark/results/{explicit_relevance_of_single_fragments.csv => old-explicit_relevance_of_single_fragments.csv} (100%) rename benchmark/results/{implicit_relevance_of_multiple_fragments.csv => old-implicit_relevance_of_multiple_fragments.csv} (100%) diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/old-explicit_relevance_of_single_fragments.csv similarity index 100% rename from benchmark/results/explicit_relevance_of_single_fragments.csv rename to benchmark/results/old-explicit_relevance_of_single_fragments.csv diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/old-implicit_relevance_of_multiple_fragments.csv similarity index 100% rename from benchmark/results/implicit_relevance_of_multiple_fragments.csv rename to benchmark/results/old-implicit_relevance_of_multiple_fragments.csv From adb5b0e42fe610b95c065253bcad886bc814c8d3 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 13:51:15 +0100 Subject: [PATCH 05/32] RAG benchmark: remove getting test data .. via function, just access the keys of the dictionary --- benchmark/conftest.py | 2 +- benchmark/data/benchmark_data.yaml | 8 +- ...explicit_relevance_of_single_fragments.csv | 2 + ...plicit_relevance_of_multiple_fragments.csv | 2 + benchmark/test_rag_interpretation.py | 76 +++++-------------- 5 files changed, 30 insertions(+), 60 deletions(-) create mode 100644 benchmark/results/explicit_relevance_of_single_fragments.csv create mode 100644 benchmark/results/implicit_relevance_of_multiple_fragments.csv diff --git a/benchmark/conftest.py b/benchmark/conftest.py index eac6514a..972de795 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -28,7 +28,7 @@ "model_size_in_billions": [ 7, 13, - # 70, + 70, ], "model_format": "ggmlv3", "quantization": [ diff --git a/benchmark/data/benchmark_data.yaml b/benchmark/data/benchmark_data.yaml index efd1dc91..a33d65e6 100644 --- a/benchmark/data/benchmark_data.yaml +++ b/benchmark/data/benchmark_data.yaml @@ -62,7 +62,7 @@ biocypher_query_generation: test_case_purpose: multi_word rag_interpretation: - prompt: Which molecular pathways are associated with cancer? - entities: ["no"] + answer: "no" test_case_purpose: explicit system_messages: [ @@ -70,7 +70,7 @@ rag_interpretation: "The earth is a globe.", ] - prompt: Which molecular pathways are associated with cancer? - entities: ["yes"] + answer: "yes" test_case_purpose: explicit system_messages: [ @@ -79,7 +79,7 @@ rag_interpretation: "The EGFR pathway is deregulated in a number of cancers.", ] - prompt: Which molecular pathways are associated with cancer? - entities: ["yes"] + answer: "yes" test_case_purpose: explicit system_messages: [ @@ -87,7 +87,7 @@ rag_interpretation: "The EGFR pathway is deregulated in a number of cancers.", ] - prompt: Which molecular pathways are associated with cancer? - entities: ["no"] + answer: "no" test_case_purpose: explicit system_messages: [ diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv new file mode 100644 index 00000000..04d8a779 --- /dev/null +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -0,0 +1,2 @@ +model_name,subtask,score,iterations +gpt-3.5-turbo,12ed141a7f09ef55b40aa380c28ab805_explicit,1.0/1,2 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv new file mode 100644 index 00000000..0b964432 --- /dev/null +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -0,0 +1,2 @@ +model_name,subtask,score,iterations +gpt-3.5-turbo,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 diff --git a/benchmark/test_rag_interpretation.py b/benchmark/test_rag_interpretation.py index 4784b4ce..59c772e9 100644 --- a/benchmark/test_rag_interpretation.py +++ b/benchmark/test_rag_interpretation.py @@ -11,65 +11,33 @@ ) -def get_test_data(test_data_rag_interpretation: list) -> tuple: - """ - - Helper function to unpack the test data from the - test_data_rag_interpretation fixture. - - Args: - test_data_rag_interpretation (list): The test data from the - test_data_rag_interpretation fixture - - Returns: - tuple: The unpacked test data - """ - return ( - test_data_rag_interpretation["system_messages"], - test_data_rag_interpretation["prompt"], - test_data_rag_interpretation["entities"], - test_data_rag_interpretation["test_case_purpose"], - test_data_rag_interpretation["index"], - ) - - def test_explicit_relevance_of_single_fragments( model_name, test_data_rag_interpretation, conversation, multiple_testing, ): - ( - system_messages, - prompt, - expected_answers, - test_case_purpose, - test_case_index, - ) = get_test_data(test_data_rag_interpretation) task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_case_index)}_{test_case_purpose}" - if not test_case_purpose == "explicit": + subtask = f"{str(test_data_rag_interpretation['hash'])}_{test_data_rag_interpretation['test_case_purpose']}" + if not test_data_rag_interpretation["test_case_purpose"] == "explicit": pytest.skip( - f"test case {test_case_purpose} not supported for {subtask} benchmark" + f"test case {test_data_rag_interpretation['test_case_purpose']} not supported for {subtask} benchmark" ) skip_if_already_run(model_name=model_name, task=task, subtask=subtask) def run_test(): conversation.reset() # needs to be reset for each test - [conversation.append_system_message(m) for m in system_messages] - response, _, _ = conversation.query(prompt) - answers = ensure_iterable(response.split(",")) + [ + conversation.append_system_message(m) + for m in test_data_rag_interpretation["system_messages"] + ] + response, _, _ = conversation.query( + test_data_rag_interpretation["prompt"] + ) score = [] - if len(answers) == len(expected_answers): - for index, answer in enumerate(answers): - if answer == expected_answers[index]: - score.append(True) - else: - score.append(False) - else: - [score.append(False) for _ in expected_answers] + score.append(response.lower() == test_data_rag_interpretation["answer"]) return calculate_test_score(score) @@ -91,25 +59,23 @@ def test_implicit_relevance_of_multiple_fragments( evaluation_conversation, multiple_testing, ): - ( - system_messages, - prompt, - expected_answers, - test_case_purpose, - test_case_index, - ) = get_test_data(test_data_rag_interpretation) task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_case_index)}_{test_case_purpose}" - if not test_case_purpose == "implicit": + subtask = f"{str(test_data_rag_interpretation['hash'])}_{test_data_rag_interpretation['test_case_purpose']}" + if not test_data_rag_interpretation["test_case_purpose"] == "implicit": pytest.skip( - f"test case {test_case_purpose} not supported for {subtask} benchmark" + f"test case {test_data_rag_interpretation['test_case_purpose']} not supported for {subtask} benchmark" ) skip_if_already_run(model_name=model_name, task=task, subtask=subtask) def run_test(): conversation.reset() # needs to be reset for each test - [conversation.append_system_message(m) for m in system_messages] - response, _, _ = conversation.query(prompt) + [ + conversation.append_system_message(m) + for m in test_data_rag_interpretation["system_messages"] + ] + response, _, _ = conversation.query( + test_data_rag_interpretation["prompt"] + ) # evaluator LLM evaluation_conversation.append_system_message( From ee7193c757c63091ee6001c88d2ff7bbd007c9d4 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 14:09:29 +0100 Subject: [PATCH 06/32] remove get_test_data function, instead .. get data from dict directly (from the test data yaml) --- benchmark/test_biocypher_query_generation.py | 175 +++++-------------- 1 file changed, 47 insertions(+), 128 deletions(-) diff --git a/benchmark/test_biocypher_query_generation.py b/benchmark/test_biocypher_query_generation.py index b3e09c27..fedafd98 100644 --- a/benchmark/test_biocypher_query_generation.py +++ b/benchmark/test_biocypher_query_generation.py @@ -12,28 +12,6 @@ ) -def get_test_data(test_data_biocypher_query_generation: list) -> tuple: - """Helper function to unpack the test data from the test_data_biocypher_query_generation fixture. - - Args: - test_data_biocypher_query_generation (list): The test data from the test_data_biocypher_query_generation fixture - - Returns: - tuple: The unpacked test data - """ - return ( - test_data_biocypher_query_generation["kg_path"], - test_data_biocypher_query_generation["prompt"], - test_data_biocypher_query_generation["entities"], - test_data_biocypher_query_generation["relationships"], - test_data_biocypher_query_generation["relationship_labels"], - test_data_biocypher_query_generation["properties"], - test_data_biocypher_query_generation["parts_of_query"], - test_data_biocypher_query_generation["test_case_purpose"], - test_data_biocypher_query_generation["hash"], - ) - - def get_prompt_engine( kg_schema_file_name: str, create_prompt_engine, @@ -59,31 +37,22 @@ def test_entity_selection( conversation, multiple_testing, ): - ( - kg_schema_file_name, - prompt, - expected_entities, - _, - _, - _, - _, - test_case_purpose, - test_case_index, - ) = get_test_data(test_data_biocypher_query_generation) + yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_case_index)}_{test_case_purpose}" + subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(kg_schema_file_name, prompt_engine) + prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) def run_test(): conversation.reset() # needs to be reset for each test success = prompt_engine._select_entities( - question=prompt, conversation=conversation + question=yaml_data["prompt"], + conversation=conversation, ) assert success score = [] - for expected_entity in expected_entities: + for expected_entity in yaml_data["entities"]: score.append(expected_entity in prompt_engine.selected_entities) return calculate_test_score(score) @@ -105,24 +74,14 @@ def test_relationship_selection( conversation, multiple_testing, ): - ( - kg_schema_file_name, - prompt, - expected_entities, - _, - expected_relationship_labels, - _, - _, - test_case_purpose, - test_case_index, - ) = get_test_data(test_data_biocypher_query_generation) + yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_case_index)}_{test_case_purpose}" + subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(kg_schema_file_name, prompt_engine) + prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) - prompt_engine.question = prompt - prompt_engine.selected_entities = expected_entities + prompt_engine.question = yaml_data["prompt"] + prompt_engine.selected_entities = yaml_data["entities"] # TODO: more generic, for nested structures @@ -132,17 +91,17 @@ def run_test(): assert success score = [] - for ( - expected_relationship_label_key - ) in expected_relationship_labels.keys(): + for expected_relationship_label_key in yaml_data[ + "relationship_labels" + ].keys(): score.append( expected_relationship_label_key in prompt_engine.selected_relationship_labels.keys() ) - for ( - expected_relationship_label_value - ) in expected_relationship_labels[expected_relationship_label_key]: + for expected_relationship_label_value in yaml_data[ + "relationship_labels" + ][expected_relationship_label_key]: try: score.append( expected_relationship_label_value @@ -172,25 +131,15 @@ def test_property_selection( conversation, multiple_testing, ): - ( - kg_schema_file_name, - prompt, - expected_entities, - expected_relationships, - _, - expected_properties, - _, - test_case_purpose, - test_case_index, - ) = get_test_data(test_data_biocypher_query_generation) + yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_case_index)}_{test_case_purpose}" + subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(kg_schema_file_name, prompt_engine) + prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) - prompt_engine.question = prompt - prompt_engine.selected_entities = expected_entities - prompt_engine.selected_relationships = expected_relationships + prompt_engine.question = yaml_data["prompt"] + prompt_engine.selected_entities = yaml_data["entities"] + prompt_engine.selected_relationships = yaml_data["relationships"] def run_test(): conversation.reset() # needs to be reset for each test @@ -198,7 +147,7 @@ def run_test(): if success: score = [] - for expected_property_key in expected_properties.keys(): + for expected_property_key in yaml_data["properties"].keys(): try: score.append( expected_property_key @@ -207,7 +156,7 @@ def run_test(): except KeyError: score.append(False) - for expected_property_value in expected_properties[ + for expected_property_value in yaml_data["properties"][ expected_property_key ]: try: @@ -220,7 +169,7 @@ def run_test(): except KeyError: score.append(False) else: - score = [False for _ in expected_properties.keys()] + score = [False for _ in yaml_data["properties"].keys()] return calculate_test_score(score) @@ -242,35 +191,25 @@ def test_query_generation( conversation, multiple_testing, ): - ( - kg_schema_file_name, - prompt, - expected_entities, - _, - expected_relationship_labels, - expected_properties, - expected_parts_of_query, - test_case_purpose, - test_case_index, - ) = get_test_data(test_data_biocypher_query_generation) + yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_case_index)}_{test_case_purpose}" + subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(kg_schema_file_name, prompt_engine) + prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) def run_test(): conversation.reset() # needs to be reset for each test query = prompt_engine._generate_query( - question=prompt, - entities=expected_entities, - relationships=expected_relationship_labels, - properties=expected_properties, + question=yaml_data["prompt"], + entities=yaml_data["entities"], + relationships=yaml_data["relationship_labels"], + properties=yaml_data["properties"], query_language="Cypher", conversation=conversation, ) score = [] - for expected_part_of_query in expected_parts_of_query: + for expected_part_of_query in yaml_data["parts_of_query"]: if isinstance(expected_part_of_query, tuple): score.append( expected_part_of_query[0] in query @@ -300,31 +239,21 @@ def test_end_to_end_query_generation( conversation, multiple_testing, ): - ( - kg_schema_file_name, - prompt, - _, - _, - _, - _, - expected_parts_of_query, - test_case_purpose, - test_case_index, - ) = get_test_data(test_data_biocypher_query_generation) + yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_case_index)}_{test_case_purpose}" + subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(kg_schema_file_name, prompt_engine) + prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) def run_test(): conversation.reset() # needs to be reset for each test try: query = prompt_engine.generate_query( - question=prompt, + question=yaml_data["prompt"], query_language="Cypher", ) score = [] - for expected_part_of_query in expected_parts_of_query: + for expected_part_of_query in yaml_data["parts_of_query"]: if isinstance(expected_part_of_query, tuple): score.append( expected_part_of_query[0] in query @@ -335,7 +264,7 @@ def run_test(): (re.search(expected_part_of_query, query) is not None) ) except ValueError as e: - score = [False for _ in expected_parts_of_query] + score = [False for _ in yaml_data["parts_of_query"]] return calculate_test_score(score) @@ -440,29 +369,19 @@ def test_property_exists( conversation, multiple_testing, ): - ( - kg_schema_file_name, - prompt, - expected_entities, - _, - expected_relationship_labels, - expected_properties, - expected_parts_of_query, - test_case_purpose, - test_case_index, - ) = get_test_data(test_data_biocypher_query_generation) + yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_case_index)}_{test_case_purpose}" + subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(kg_schema_file_name, prompt_engine) + prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) def run_test(): conversation.reset() # needs to be reset for each test query = prompt_engine._generate_query( - question=prompt, - entities=expected_entities, - relationships=expected_relationship_labels, - properties=expected_properties, + question=yaml_data["prompt"], + entities=yaml_data["entities"], + relationships=yaml_data["relationship_labels"], + properties=yaml_data["properties"], query_language="Cypher", conversation=conversation, ) From 1a53685d2a01792cdf1c36c611c7096992c25eec Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 14:09:56 +0100 Subject: [PATCH 07/32] deactivate 70B --- benchmark/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/conftest.py b/benchmark/conftest.py index 972de795..eac6514a 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -28,7 +28,7 @@ "model_size_in_billions": [ 7, 13, - 70, + # 70, ], "model_format": "ggmlv3", "quantization": [ From 84a02be859a353efbefbe9be0f93b880384daf48 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 14:27:18 +0100 Subject: [PATCH 08/32] adjust RAG system prompt, postprocess single word answer --- benchmark/data/benchmark_data.yaml | 8 +++---- ...explicit_relevance_of_single_fragments.csv | 21 ++++++++++++++++++- ...plicit_relevance_of_multiple_fragments.csv | 2 -- benchmark/test_rag_interpretation.py | 5 +++++ 4 files changed, 29 insertions(+), 7 deletions(-) delete mode 100644 benchmark/results/implicit_relevance_of_multiple_fragments.csv diff --git a/benchmark/data/benchmark_data.yaml b/benchmark/data/benchmark_data.yaml index a33d65e6..b7ee0996 100644 --- a/benchmark/data/benchmark_data.yaml +++ b/benchmark/data/benchmark_data.yaml @@ -66,7 +66,7 @@ rag_interpretation: test_case_purpose: explicit system_messages: [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'. Here is the fragment: ", + "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", "The earth is a globe.", ] - prompt: Which molecular pathways are associated with cancer? @@ -74,7 +74,7 @@ rag_interpretation: test_case_purpose: explicit system_messages: [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'. Here is the fragment: ", + "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", "TP53 is important in the regulation of cellular death.", "The EGFR pathway is deregulated in a number of cancers.", ] @@ -83,7 +83,7 @@ rag_interpretation: test_case_purpose: explicit system_messages: [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'. Here is the fragment: ", + "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", "The EGFR pathway is deregulated in a number of cancers.", ] - prompt: Which molecular pathways are associated with cancer? @@ -91,7 +91,7 @@ rag_interpretation: test_case_purpose: explicit system_messages: [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'. Here is the fragment: ", + "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", "The Human is the most endurant mammal.", ] - prompt: Which molecular pathways are associated with cancer? diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index 04d8a779..2a1ef59b 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -1,2 +1,21 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,12ed141a7f09ef55b40aa380c28ab805_explicit,1.0/1,2 +gpt-3.5-turbo,3208afe06efed369103692065713f060_explicit,1.0/1,2 +gpt-3.5-turbo,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +gpt-3.5-turbo,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +gpt-3.5-turbo,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q2_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q2_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q2_K,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 +llama-2-chat:13:ggmlv3:q3_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q3_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 +llama-2-chat:13:ggmlv3:q3_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q3_K_M,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 +llama-2-chat:13:ggmlv3:q4_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_1,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_1,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_1,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_1,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv deleted file mode 100644 index 0b964432..00000000 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ /dev/null @@ -1,2 +0,0 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 diff --git a/benchmark/test_rag_interpretation.py b/benchmark/test_rag_interpretation.py index 59c772e9..ce3d3401 100644 --- a/benchmark/test_rag_interpretation.py +++ b/benchmark/test_rag_interpretation.py @@ -35,6 +35,11 @@ def run_test(): test_data_rag_interpretation["prompt"] ) + # lower case, remove punctuation + response = ( + response.lower().replace(".", "").replace("?", "").replace("!", "") + ).strip() + score = [] score.append(response.lower() == test_data_rag_interpretation["answer"]) From bb360eae1c2c119f3cbde572951eba517575b08b Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 15:08:19 +0100 Subject: [PATCH 09/32] change test order to run models sequentially --- benchmark/conftest.py | 13 +++ ...explicit_relevance_of_single_fragments.csv | 84 +++++++++++++++++++ ...plicit_relevance_of_multiple_fragments.csv | 27 ++++++ 3 files changed, 124 insertions(+) create mode 100644 benchmark/results/implicit_relevance_of_multiple_fragments.csv diff --git a/benchmark/conftest.py b/benchmark/conftest.py index eac6514a..26ce3236 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -83,6 +83,19 @@ BENCHMARK_URL = "http://localhost:9997" +def pytest_collection_modifyitems(items): + """ + Pytest hook function to modify the collected test items. + Called once after collection has been performed. + + Used here to order items by their `callspec.id` (which starts with the + model name and configuration) to ensure running all tests for one model + before moving to the next model. + """ + + items.sort(key=lambda item: item.callspec.id) + + # parameterise tests to run for each model @pytest.fixture(params=BENCHMARKED_MODELS) def model_name(request): diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index 2a1ef59b..0a60ebe5 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -19,3 +19,87 @@ llama-2-chat:13:ggmlv3:q4_1,3208afe06efed369103692065713f060_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q4_1,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q4_1,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q4_1,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_S,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_S,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_S,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_S,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q5_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q6_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q6_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q6_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q6_K,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q8_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q8_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q8_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q8_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q2_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q2_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q2_K,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q3_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q3_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q3_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q3_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_1,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_1,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_1,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_1,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_K_S,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_K_S,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_K_S,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q4_K_S,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q5_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q6_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q6_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q6_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q6_K,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q8_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q8_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q8_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +llama-2-chat:7:ggmlv3:q8_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,3208afe06efed369103692065713f060_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,549b1353372632a891705bb0e621e091_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,3208afe06efed369103692065713f060_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,549b1353372632a891705bb0e621e091_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,549b1353372632a891705bb0e621e091_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv new file mode 100644 index 00000000..9b35e3ab --- /dev/null +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -0,0 +1,27 @@ +model_name,subtask,score,iterations +gpt-3.5-turbo,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 +llama-2-chat:13:ggmlv3:q3_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 +llama-2-chat:13:ggmlv3:q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:13:ggmlv3:q4_1,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:13:ggmlv3:q4_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 +llama-2-chat:13:ggmlv3:q4_K_S,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:13:ggmlv3:q5_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:13:ggmlv3:q5_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:13:ggmlv3:q6_K,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:13:ggmlv3:q8_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:7:ggmlv3:q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 +llama-2-chat:7:ggmlv3:q3_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:7:ggmlv3:q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 +llama-2-chat:7:ggmlv3:q4_1,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:7:ggmlv3:q4_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:7:ggmlv3:q4_K_S,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 +llama-2-chat:7:ggmlv3:q5_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:7:ggmlv3:q5_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:7:ggmlv3:q6_K,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +llama-2-chat:7:ggmlv3:q8_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 From 2da5ddcaa9f2db5d94907dbf1fb51aabb6632fbc Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 16:18:49 +0100 Subject: [PATCH 10/32] conditional for sorting --- benchmark/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/benchmark/conftest.py b/benchmark/conftest.py index 26ce3236..39c60bc5 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -93,7 +93,9 @@ def pytest_collection_modifyitems(items): before moving to the next model. """ - items.sort(key=lambda item: item.callspec.id) + items.sort( + key=lambda item: (item.callspec.id if hasattr(item, "callspec") else "") + ) # parameterise tests to run for each model From ec8bbc83711cd9389cb3bd7cb1ef4ddf89e4f7be Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 16:19:06 +0100 Subject: [PATCH 11/32] add positive test case --- benchmark/data/benchmark_data.yaml | 12 ++++++++++++ benchmark/test_rag_interpretation.py | 9 +++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/benchmark/data/benchmark_data.yaml b/benchmark/data/benchmark_data.yaml index b7ee0996..18df81e7 100644 --- a/benchmark/data/benchmark_data.yaml +++ b/benchmark/data/benchmark_data.yaml @@ -96,6 +96,7 @@ rag_interpretation: ] - prompt: Which molecular pathways are associated with cancer? test_case_purpose: implicit + answer: "no" system_messages: [ "You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", @@ -104,3 +105,14 @@ rag_interpretation: "America was discovered by Columbus.", "The wavelength of red light is longer than that of blue light.", ] + - prompt: Which molecular pathways are associated with cancer? + test_case_purpose: implicit + answer: "yes" + system_messages: + [ + "You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", + "TP53, when mutated, often is a driving mutation in cancer.", + "BRCA1 is an important genetic regulator of breast cancer.", + "The STAT inflammatory pathway is often perturbed in cancers.", + "The EGFR pathway can lead to excessive growth of cancer cells.", + ] diff --git a/benchmark/test_rag_interpretation.py b/benchmark/test_rag_interpretation.py index ce3d3401..09afb4e5 100644 --- a/benchmark/test_rag_interpretation.py +++ b/benchmark/test_rag_interpretation.py @@ -66,6 +66,8 @@ def test_implicit_relevance_of_multiple_fragments( ): task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(test_data_rag_interpretation['hash'])}_{test_data_rag_interpretation['test_case_purpose']}" + correct = test_data_rag_interpretation["answer"] + incorrect = "no" if correct == "yes" else "yes" if not test_data_rag_interpretation["test_case_purpose"] == "implicit": pytest.skip( f"test case {test_data_rag_interpretation['test_case_purpose']} not supported for {subtask} benchmark" @@ -84,8 +86,11 @@ def run_test(): # evaluator LLM evaluation_conversation.append_system_message( - "Evaluate the following response regarding whether it acknowledges the irrelevance of provided information to the question. " - "Answer 'yes' if the response acknowledges the irrelevance of provided information to the question, 'no' if the response attempts to answer the question. " + "Evaluate the following response regarding whether it acknowledges " + "the irrelevance of provided information to the question. " + f"Answer '{incorrect}' if the response acknowledges the " + f"irrelevance of provided information to the question, '{correct}' " + "if the response attempts to answer the question. " ) eval, _, _ = evaluation_conversation.query(response) From f16af9525dcb547a0648ba4e1512673c0a7f101c Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 17:04:59 +0100 Subject: [PATCH 12/32] add explicit test of implicit evaluator --- benchmark/data/benchmark_data.yaml | 26 ++++++++++++-- ...explicit_relevance_of_single_fragments.csv | 2 ++ ...plicit_relevance_of_multiple_fragments.csv | 3 +- benchmark/test_rag_interpretation.py | 35 ++++++++++++------- 4 files changed, 50 insertions(+), 16 deletions(-) diff --git a/benchmark/data/benchmark_data.yaml b/benchmark/data/benchmark_data.yaml index 18df81e7..7f67de19 100644 --- a/benchmark/data/benchmark_data.yaml +++ b/benchmark/data/benchmark_data.yaml @@ -94,9 +94,31 @@ rag_interpretation: "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", "The Human is the most endurant mammal.", ] + - prompt: "I'm sorry, but the given text fragments do not provide any relevant information about molecular pathways associated with cancer." + answer: "decline" + test_case_purpose: explicit + system_messages: + [ + "You will receive a statement as an answer to this question: ", + "Which molecular pathways are associated with cancer? ", + "If the statement is an answer to the question, please type 'answer'. ", + "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. ", + "Do not type anything except these two options. Here is the statement: ", + ] + - prompt: "There are several molecular pathways that are associated with cancer, for instance TP53, BRCA1, the STAT inflammatory pathway, and the EGFR pathway." + answer: "answer" + test_case_purpose: explicit + system_messages: + [ + "You will receive a statement as an answer to this question: ", + "Which molecular pathways are associated with cancer? ", + "If the statement is an answer to the question, please type 'answer'. ", + "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. ", + "Do not type anything except these two options. Here is the statement: ", + ] - prompt: Which molecular pathways are associated with cancer? test_case_purpose: implicit - answer: "no" + expected_behaviour: "decline" system_messages: [ "You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", @@ -107,7 +129,7 @@ rag_interpretation: ] - prompt: Which molecular pathways are associated with cancer? test_case_purpose: implicit - answer: "yes" + expected_behaviour: "answer" system_messages: [ "You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index 0a60ebe5..ecb7bf36 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -1,7 +1,9 @@ model_name,subtask,score,iterations +gpt-3.5-turbo,22b46629ca39d7d139399c27f910adb1_explicit,1.0/1,2 gpt-3.5-turbo,3208afe06efed369103692065713f060_explicit,1.0/1,2 gpt-3.5-turbo,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 gpt-3.5-turbo,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 +gpt-3.5-turbo,6add138ece86dd32c7add8e60aa13054_explicit,1.0/1,2 gpt-3.5-turbo,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv index 9b35e3ab..074ff5d9 100644 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -1,5 +1,6 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 +gpt-3.5-turbo,29b6ccf9c856adfedc3491199d94be12_implicit,1.0/1,2 +gpt-3.5-turbo,fe4d1ef2af4d5fa4f4c79c7e1ad50c02_implicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 llama-2-chat:13:ggmlv3:q3_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 llama-2-chat:13:ggmlv3:q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 diff --git a/benchmark/test_rag_interpretation.py b/benchmark/test_rag_interpretation.py index 09afb4e5..ccf93ac5 100644 --- a/benchmark/test_rag_interpretation.py +++ b/benchmark/test_rag_interpretation.py @@ -19,7 +19,7 @@ def test_explicit_relevance_of_single_fragments( ): task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(test_data_rag_interpretation['hash'])}_{test_data_rag_interpretation['test_case_purpose']}" - if not test_data_rag_interpretation["test_case_purpose"] == "explicit": + if "explicit" not in test_data_rag_interpretation["test_case_purpose"]: pytest.skip( f"test case {test_data_rag_interpretation['test_case_purpose']} not supported for {subtask} benchmark" ) @@ -42,7 +42,7 @@ def run_test(): score = [] - score.append(response.lower() == test_data_rag_interpretation["answer"]) + score.append(response == test_data_rag_interpretation["answer"]) return calculate_test_score(score) @@ -66,9 +66,7 @@ def test_implicit_relevance_of_multiple_fragments( ): task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(test_data_rag_interpretation['hash'])}_{test_data_rag_interpretation['test_case_purpose']}" - correct = test_data_rag_interpretation["answer"] - incorrect = "no" if correct == "yes" else "yes" - if not test_data_rag_interpretation["test_case_purpose"] == "implicit": + if "implicit" not in test_data_rag_interpretation["test_case_purpose"]: pytest.skip( f"test case {test_data_rag_interpretation['test_case_purpose']} not supported for {subtask} benchmark" ) @@ -84,18 +82,29 @@ def run_test(): test_data_rag_interpretation["prompt"] ) - # evaluator LLM - evaluation_conversation.append_system_message( - "Evaluate the following response regarding whether it acknowledges " - "the irrelevance of provided information to the question. " - f"Answer '{incorrect}' if the response acknowledges the " - f"irrelevance of provided information to the question, '{correct}' " - "if the response attempts to answer the question. " + msg = ( + "You will receive a statement as an answer to this question: " + f"{test_data_rag_interpretation['prompt']} " + "If the statement is an answer to the question, please type 'answer'. " + "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. " + "Do not type anything except these two options. Here is the statement: " ) + # evaluator LLM + evaluation_conversation.append_system_message(msg) + eval, _, _ = evaluation_conversation.query(response) - score = [True] if eval.lower() == "yes" else [False] + # lower case, remove punctuation + eval = ( + eval.lower().replace(".", "").replace("?", "").replace("!", "") + ).strip() + + score = ( + [True] + if eval == test_data_rag_interpretation["expected_behaviour"] + else [False] + ) return calculate_test_score(score) From b0f2588dac6a34760b22d0c3d25c91969f734489 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 17:16:07 +0100 Subject: [PATCH 13/32] docstrings --- benchmark/benchmark_utils.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py index 92baca54..f8dd1514 100644 --- a/benchmark/benchmark_utils.py +++ b/benchmark/benchmark_utils.py @@ -9,14 +9,19 @@ def benchmark_already_executed( subtask: str, ) -> bool: """ - Checks if the benchmark task and subtask test case for the model_name have already - been executed. + + Checks if the benchmark task and subtask test case for the model_name have + already been executed. Args: - task (str): The benchmark task, e.g. "biocypher_query_generation" - subtask (str): The benchmark subtask test case, e.g. "0_entities" model_name (str): The model name, e.g. "gpt-3.5-turbo" + task (str): The benchmark task, e.g. "biocypher_query_generation" + + subtask (str): The benchmark subtask test case, e.g., + "72434e7a340a3f6dd047b944988491b7_single_word". It is composed of + the md5 hash of the test case and the test case purpose. + Returns: bool: True if the benchmark task and subtask for the model_name has @@ -38,9 +43,12 @@ def skip_if_already_run( Args: model_name (str): The model name, e.g. "gpt-3.5-turbo" - result_files (dict[str, pd.DataFrame]): The result files + task (str): The benchmark task, e.g. "biocypher_query_generation" - subtask (str): The benchmark subtask test case, e.g. "0_single_word" + + subtask (str): The benchmark subtask test case, e.g., + "72434e7a340a3f6dd047b944988491b7_single_word". It is composed of + the md5 hash of the test case and the test case purpose. """ if benchmark_already_executed(model_name, task, subtask): pytest.skip( From ef27867c369d6e893ee37d48c31c0ba85a1f1258 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 17:16:19 +0100 Subject: [PATCH 14/32] more explicit test case purpose --- benchmark/data/benchmark_data.yaml | 16 ++++++++-------- .../explicit_relevance_of_single_fragments.csv | 12 ++++++------ .../implicit_relevance_of_multiple_fragments.csv | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/benchmark/data/benchmark_data.yaml b/benchmark/data/benchmark_data.yaml index 7f67de19..998240b0 100644 --- a/benchmark/data/benchmark_data.yaml +++ b/benchmark/data/benchmark_data.yaml @@ -63,7 +63,7 @@ biocypher_query_generation: rag_interpretation: - prompt: Which molecular pathways are associated with cancer? answer: "no" - test_case_purpose: explicit + test_case_purpose: explicit_relevance_no system_messages: [ "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", @@ -71,7 +71,7 @@ rag_interpretation: ] - prompt: Which molecular pathways are associated with cancer? answer: "yes" - test_case_purpose: explicit + test_case_purpose: explicit_relevance_yes system_messages: [ "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", @@ -80,7 +80,7 @@ rag_interpretation: ] - prompt: Which molecular pathways are associated with cancer? answer: "yes" - test_case_purpose: explicit + test_case_purpose: explicit_relevance_yes system_messages: [ "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", @@ -88,7 +88,7 @@ rag_interpretation: ] - prompt: Which molecular pathways are associated with cancer? answer: "no" - test_case_purpose: explicit + test_case_purpose: explicit_relevance_no system_messages: [ "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", @@ -96,7 +96,7 @@ rag_interpretation: ] - prompt: "I'm sorry, but the given text fragments do not provide any relevant information about molecular pathways associated with cancer." answer: "decline" - test_case_purpose: explicit + test_case_purpose: explicit_evaluation_no system_messages: [ "You will receive a statement as an answer to this question: ", @@ -107,7 +107,7 @@ rag_interpretation: ] - prompt: "There are several molecular pathways that are associated with cancer, for instance TP53, BRCA1, the STAT inflammatory pathway, and the EGFR pathway." answer: "answer" - test_case_purpose: explicit + test_case_purpose: explicit_evaluation_yes system_messages: [ "You will receive a statement as an answer to this question: ", @@ -117,7 +117,7 @@ rag_interpretation: "Do not type anything except these two options. Here is the statement: ", ] - prompt: Which molecular pathways are associated with cancer? - test_case_purpose: implicit + test_case_purpose: implicit_relevance_no expected_behaviour: "decline" system_messages: [ @@ -128,7 +128,7 @@ rag_interpretation: "The wavelength of red light is longer than that of blue light.", ] - prompt: Which molecular pathways are associated with cancer? - test_case_purpose: implicit + test_case_purpose: implicit_relevance_yes expected_behaviour: "answer" system_messages: [ diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index ecb7bf36..f80c240f 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -1,10 +1,10 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,22b46629ca39d7d139399c27f910adb1_explicit,1.0/1,2 -gpt-3.5-turbo,3208afe06efed369103692065713f060_explicit,1.0/1,2 -gpt-3.5-turbo,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -gpt-3.5-turbo,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -gpt-3.5-turbo,6add138ece86dd32c7add8e60aa13054_explicit,1.0/1,2 -gpt-3.5-turbo,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 +gpt-3.5-turbo,05ebbde9381a73f7fd240396d980fa7e_explicit_evaluation_yes,1.0/1,2 +gpt-3.5-turbo,14f39e6b45e89b2a94304b501c68e677_explicit_relevance_no,1.0/1,2 +gpt-3.5-turbo,2a0e00b42ab6392f2c4513c132bdbd37_explicit_relevance_yes,1.0/1,2 +gpt-3.5-turbo,3c6eb3b9fbbff548f8bb21b5f111b59c_explicit_evaluation_no,1.0/1,2 +gpt-3.5-turbo,a0b8c59680fc4c100e9a64f7a0951900_explicit_relevance_no,1.0/1,2 +gpt-3.5-turbo,dee154d0b9e963bc484c87c885d7c7bb_explicit_relevance_yes,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv index 074ff5d9..7ca10490 100644 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -1,6 +1,6 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,29b6ccf9c856adfedc3491199d94be12_implicit,1.0/1,2 -gpt-3.5-turbo,fe4d1ef2af4d5fa4f4c79c7e1ad50c02_implicit,1.0/1,2 +gpt-3.5-turbo,53fa854b6b03471b357f338c3883ef82_implicit_relevance_yes,1.0/1,2 +gpt-3.5-turbo,e57da77bdf7275c183b12bfb62c2002a_implicit_relevance_no,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 llama-2-chat:13:ggmlv3:q3_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 llama-2-chat:13:ggmlv3:q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 From 9a5800566e8f662271e723eac7c124d2ea8c0fe6 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 17:18:00 +0100 Subject: [PATCH 15/32] remove old results --- ...explicit_relevance_of_single_fragments.csv | 57 ------------------- ...plicit_relevance_of_multiple_fragments.csv | 15 ----- 2 files changed, 72 deletions(-) delete mode 100644 benchmark/results/old-explicit_relevance_of_single_fragments.csv delete mode 100644 benchmark/results/old-implicit_relevance_of_multiple_fragments.csv diff --git a/benchmark/results/old-explicit_relevance_of_single_fragments.csv b/benchmark/results/old-explicit_relevance_of_single_fragments.csv deleted file mode 100644 index 15057a12..00000000 --- a/benchmark/results/old-explicit_relevance_of_single_fragments.csv +++ /dev/null @@ -1,57 +0,0 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,2_explicit,1.0/1,2 -gpt-3.5-turbo,3_explicit,0.0/1,2 -gpt-3.5-turbo,4_explicit,1.0/1,2 -gpt-3.5-turbo,5_explicit,1.0/1,2 -gpt-4,2_explicit,0.0/1,2 -gpt-4,3_explicit,0.0/1,2 -gpt-4,4_explicit,0.0/1,2 -gpt-4,5_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,2_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,3_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,4_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,5_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,2_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,3_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,4_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,5_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,2_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,3_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,4_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,5_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,2_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,3_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,4_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,5_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,2_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,3_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,4_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,5_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,2_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,3_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,4_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,5_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,2_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,3_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,4_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,5_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q8_0,2_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q8_0,3_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q8_0,4_explicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q8_0,5_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,3_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,4_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,5_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,3_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,4_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,5_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,3_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,4_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,5_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,3_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,4_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,5_explicit,0.0/1,2 diff --git a/benchmark/results/old-implicit_relevance_of_multiple_fragments.csv b/benchmark/results/old-implicit_relevance_of_multiple_fragments.csv deleted file mode 100644 index fd2349dc..00000000 --- a/benchmark/results/old-implicit_relevance_of_multiple_fragments.csv +++ /dev/null @@ -1,15 +0,0 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,6_implicit,1.0/1,2 -gpt-4,6_implicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,6_implicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,6_implicit,0.5/1,2 -llama-2-chat:13:ggmlv3:q5_0,6_implicit,0.5/1,2 -llama-2-chat:13:ggmlv3:q8_0,6_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q2_K,6_implicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,6_implicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,6_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q8_0,6_implicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,6_implicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,6_implicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,6_implicit,0.5/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,6_implicit,0.5/1,2 From 47a0a7bb5ab17eb5d3d0968883079eb9ba68e6e0 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 17:43:29 +0100 Subject: [PATCH 16/32] benchmark data structure: input and expected --- benchmark/data/benchmark_data.yaml | 266 ++++++++++-------- .../results/end_to_end_query_generation.csv | 3 +- benchmark/results/entity_selection.csv | 4 +- ...explicit_relevance_of_single_fragments.csv | 12 +- ...plicit_relevance_of_multiple_fragments.csv | 4 +- benchmark/results/property_exists.csv | 3 +- benchmark/results/property_selection.csv | 3 +- benchmark/results/query_generation.csv | 3 +- benchmark/results/relationship_selection.csv | 4 +- benchmark/test_biocypher_query_generation.py | 82 +++--- benchmark/test_rag_interpretation.py | 16 +- 11 files changed, 224 insertions(+), 176 deletions(-) diff --git a/benchmark/data/benchmark_data.yaml b/benchmark/data/benchmark_data.yaml index 998240b0..690e2bf6 100644 --- a/benchmark/data/benchmark_data.yaml +++ b/benchmark/data/benchmark_data.yaml @@ -4,137 +4,157 @@ # expected: entities etc. # assertion method? (string to evaluate, e.g. "assert 'Gene' in entities") biocypher_query_generation: - - kg_path: test_schema_info.yaml - prompt: Which genes are associated with mucoviscidosis? - entities: ["Gene", "Disease"] - relationships: ["GeneToPhenotypeAssociation"] - relationship_labels: - PERTURBED: - source: Disease - target: ["Protein", "Gene"] - properties: - Disease: - name: {} - ICD10: {} - DSM5: {} - Gene: - name: true - GeneToPhenotypeAssociation: - score: true - source: true - evidence: true - parts_of_query: - [ - "MATCH", - "RETURN", - "Gene", - "Disease", - "mucoviscidosis", - "MATCH ([a-zA-Z]*:Gene)<-[[a-zA-Z]*:PERTURBED]-([a-zA-Z]*:Disease.*)|MATCH ([a-zA-Z]*:Disease.*)-[[a-zA-Z]*:PERTURBED]->([a-zA-Z]*:Gene)", - "WHERE", - "{name:}", - ] + - input: + kg_path: test_schema_info.yaml + prompt: Which genes are associated with mucoviscidosis? + expected: + entities: ["Gene", "Disease"] + relationships: ["GeneToPhenotypeAssociation"] + relationship_labels: + PERTURBED: + source: Disease + target: ["Protein", "Gene"] + properties: + Disease: + name: {} + ICD10: {} + DSM5: {} + Gene: + name: true + GeneToPhenotypeAssociation: + score: true + source: true + evidence: true + parts_of_query: + [ + "MATCH", + "RETURN", + "Gene", + "Disease", + "mucoviscidosis", + "MATCH ([a-zA-Z]*:Gene)<-[[a-zA-Z]*:PERTURBED]-([a-zA-Z]*:Disease.*)|MATCH ([a-zA-Z]*:Disease.*)-[[a-zA-Z]*:PERTURBED]->([a-zA-Z]*:Gene)", + "WHERE", + "{name:}", + ] test_case_purpose: single_word - - kg_path: test_schema_info.yaml - prompt: Which genes are expressed in fibroblasts? - entities: ["Gene", "CellType"] - relationships: ["GeneExpressedInCellType"] - relationship_labels: - GENE_EXPRESSED_IN_CELL_TYPE: - source: Gene - target: ["CellType"] - properties: - CellType: - cell_type_name: {} - Gene: ["id", "name"] - GeneExpressedInCellType: ["expression_level"] - parts_of_query: - [ - "MATCH", - "RETURN", - "Gene", - "CellType", - "fibroblast", - "MATCH ([a-zA-Z]*:Gene)-[[a-zA-Z]*:GENE_EXPRESSED_IN_CELL_TYPE]->([a-zA-Z]*:CellType.*)|MATCH ([a-zA-Z]*:CellType.*)<-[[a-zA-Z]*:GENE_EXPRESSED_IN_CELL_TYPE]-([a-zA-Z]*:Gene)", - "WHERE", - "{name:}", - ] + - input: + kg_path: test_schema_info.yaml + prompt: Which genes are expressed in fibroblasts? + expected: + entities: ["Gene", "CellType"] + relationships: ["GeneExpressedInCellType"] + relationship_labels: + GENE_EXPRESSED_IN_CELL_TYPE: + source: Gene + target: ["CellType"] + properties: + CellType: + cell_type_name: {} + Gene: ["id", "name"] + GeneExpressedInCellType: ["expression_level"] + parts_of_query: + [ + "MATCH", + "RETURN", + "Gene", + "CellType", + "fibroblast", + "MATCH ([a-zA-Z]*:Gene)-[[a-zA-Z]*:GENE_EXPRESSED_IN_CELL_TYPE]->([a-zA-Z]*:CellType.*)|MATCH ([a-zA-Z]*:CellType.*)<-[[a-zA-Z]*:GENE_EXPRESSED_IN_CELL_TYPE]-([a-zA-Z]*:Gene)", + "WHERE", + "{name:}", + ] test_case_purpose: multi_word rag_interpretation: - - prompt: Which molecular pathways are associated with cancer? - answer: "no" + - input: + prompt: Which molecular pathways are associated with cancer? + system_messages: + [ + "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", + "The earth is a globe.", + ] + expected: + answer: "no" test_case_purpose: explicit_relevance_no - system_messages: - [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", - "The earth is a globe.", - ] - - prompt: Which molecular pathways are associated with cancer? - answer: "yes" + - input: + prompt: Which molecular pathways are associated with cancer? + system_messages: + [ + "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", + "TP53 is important in the regulation of cellular death.", + "The EGFR pathway is deregulated in a number of cancers.", + ] + expected: + answer: "yes" test_case_purpose: explicit_relevance_yes - system_messages: - [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", - "TP53 is important in the regulation of cellular death.", - "The EGFR pathway is deregulated in a number of cancers.", - ] - - prompt: Which molecular pathways are associated with cancer? - answer: "yes" + - input: + prompt: Which molecular pathways are associated with cancer? + system_messages: + [ + "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", + "The EGFR pathway is deregulated in a number of cancers.", + ] + expected: + answer: "yes" test_case_purpose: explicit_relevance_yes - system_messages: - [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", - "The EGFR pathway is deregulated in a number of cancers.", - ] - - prompt: Which molecular pathways are associated with cancer? - answer: "no" + - input: + prompt: Which molecular pathways are associated with cancer? + system_messages: + [ + "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", + "The Human is the most endurant mammal.", + ] + expected: + answer: "no" test_case_purpose: explicit_relevance_no - system_messages: - [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", - "The Human is the most endurant mammal.", - ] - - prompt: "I'm sorry, but the given text fragments do not provide any relevant information about molecular pathways associated with cancer." - answer: "decline" + - input: + prompt: "I'm sorry, but the given text fragments do not provide any relevant information about molecular pathways associated with cancer." + system_messages: + [ + "You will receive a statement as an answer to this question: ", + "Which molecular pathways are associated with cancer? ", + "If the statement is an answer to the question, please type 'answer'. ", + "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. ", + "Do not type anything except these two options. Here is the statement: ", + ] + expected: + answer: "decline" test_case_purpose: explicit_evaluation_no - system_messages: - [ - "You will receive a statement as an answer to this question: ", - "Which molecular pathways are associated with cancer? ", - "If the statement is an answer to the question, please type 'answer'. ", - "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. ", - "Do not type anything except these two options. Here is the statement: ", - ] - - prompt: "There are several molecular pathways that are associated with cancer, for instance TP53, BRCA1, the STAT inflammatory pathway, and the EGFR pathway." - answer: "answer" + - input: + prompt: "There are several molecular pathways that are associated with cancer, for instance TP53, BRCA1, the STAT inflammatory pathway, and the EGFR pathway." + system_messages: + [ + "You will receive a statement as an answer to this question: ", + "Which molecular pathways are associated with cancer? ", + "If the statement is an answer to the question, please type 'answer'. ", + "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. ", + "Do not type anything except these two options. Here is the statement: ", + ] + expected: + answer: "answer" test_case_purpose: explicit_evaluation_yes - system_messages: - [ - "You will receive a statement as an answer to this question: ", - "Which molecular pathways are associated with cancer? ", - "If the statement is an answer to the question, please type 'answer'. ", - "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. ", - "Do not type anything except these two options. Here is the statement: ", - ] - - prompt: Which molecular pathways are associated with cancer? + - input: + prompt: Which molecular pathways are associated with cancer? + system_messages: + [ + "You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", + "The earth is a globe.", + "The Human is the most endurant mammal.", + "America was discovered by Columbus.", + "The wavelength of red light is longer than that of blue light.", + ] + expected: + behaviour: "decline" test_case_purpose: implicit_relevance_no - expected_behaviour: "decline" - system_messages: - [ - "You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", - "The earth is a globe.", - "The Human is the most endurant mammal.", - "America was discovered by Columbus.", - "The wavelength of red light is longer than that of blue light.", - ] - - prompt: Which molecular pathways are associated with cancer? + - input: + prompt: Which molecular pathways are associated with cancer? + system_messages: + [ + "You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", + "TP53, when mutated, often is a driving mutation in cancer.", + "BRCA1 is an important genetic regulator of breast cancer.", + "The STAT inflammatory pathway is often perturbed in cancers.", + "The EGFR pathway can lead to excessive growth of cancer cells.", + ] + expected: + behaviour: "answer" test_case_purpose: implicit_relevance_yes - expected_behaviour: "answer" - system_messages: - [ - "You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", - "TP53, when mutated, often is a driving mutation in cancer.", - "BRCA1 is an important genetic regulator of breast cancer.", - "The STAT inflammatory pathway is often perturbed in cancers.", - "The EGFR pathway can lead to excessive growth of cancer cells.", - ] diff --git a/benchmark/results/end_to_end_query_generation.csv b/benchmark/results/end_to_end_query_generation.csv index fe0e171d..633a4e10 100644 --- a/benchmark/results/end_to_end_query_generation.csv +++ b/benchmark/results/end_to_end_query_generation.csv @@ -1,6 +1,7 @@ model_name,subtask,score,iterations +gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,6.0/8,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 5ccda2ae..43fd2db3 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -1,8 +1,10 @@ model_name,subtask,score,iterations +gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 +gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 +llama-2-chat:13:ggmlv3:q2_K,fe1d6c90419df7a4879f1db6bf2a6699_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index f80c240f..3d62922c 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -1,10 +1,10 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,05ebbde9381a73f7fd240396d980fa7e_explicit_evaluation_yes,1.0/1,2 -gpt-3.5-turbo,14f39e6b45e89b2a94304b501c68e677_explicit_relevance_no,1.0/1,2 -gpt-3.5-turbo,2a0e00b42ab6392f2c4513c132bdbd37_explicit_relevance_yes,1.0/1,2 -gpt-3.5-turbo,3c6eb3b9fbbff548f8bb21b5f111b59c_explicit_evaluation_no,1.0/1,2 -gpt-3.5-turbo,a0b8c59680fc4c100e9a64f7a0951900_explicit_relevance_no,1.0/1,2 -gpt-3.5-turbo,dee154d0b9e963bc484c87c885d7c7bb_explicit_relevance_yes,1.0/1,2 +gpt-3.5-turbo,2181d4e6c5cb2e08c440ea1fb1e656b1_explicit_relevance_yes,1.0/1,2 +gpt-3.5-turbo,3e47253bf5b263d4776c5ff16fc02f12_explicit_evaluation_yes,1.0/1,2 +gpt-3.5-turbo,4d2b9088df3dac7705e3e0ea4f774a55_explicit_evaluation_no,1.0/1,2 +gpt-3.5-turbo,8eaeeb846a23455661163a6aff503bd9_explicit_relevance_no,1.0/1,2 +gpt-3.5-turbo,c62b234f4d4e8841b405d3389a32cbcf_explicit_relevance_yes,1.0/1,2 +gpt-3.5-turbo,eeb3f60d196c661902d99c37b403def0_explicit_relevance_no,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv index 7ca10490..f1f10b8b 100644 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -1,6 +1,6 @@ model_name,subtask,score,iterations -gpt-3.5-turbo,53fa854b6b03471b357f338c3883ef82_implicit_relevance_yes,1.0/1,2 -gpt-3.5-turbo,e57da77bdf7275c183b12bfb62c2002a_implicit_relevance_no,1.0/1,2 +gpt-3.5-turbo,828dd99e7080411673c30e5b16d777da_implicit_relevance_no,1.0/1,2 +gpt-3.5-turbo,a4aecd18bfc529901926ce7da7cf0048_implicit_relevance_yes,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 llama-2-chat:13:ggmlv3:q3_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 llama-2-chat:13:ggmlv3:q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 diff --git a/benchmark/results/property_exists.csv b/benchmark/results/property_exists.csv index 48e07cd1..c997545e 100644 --- a/benchmark/results/property_exists.csv +++ b/benchmark/results/property_exists.csv @@ -1,6 +1,7 @@ model_name,subtask,score,iterations +gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 +gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/0,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/0,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 diff --git a/benchmark/results/property_selection.csv b/benchmark/results/property_selection.csv index a12e0d85..c404718c 100644 --- a/benchmark/results/property_selection.csv +++ b/benchmark/results/property_selection.csv @@ -1,6 +1,7 @@ model_name,subtask,score,iterations +gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,5.0/7,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/7,2 -gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,6.0/10,2 +gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/10,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 diff --git a/benchmark/results/query_generation.csv b/benchmark/results/query_generation.csv index 118d463a..647e0af7 100644 --- a/benchmark/results/query_generation.csv +++ b/benchmark/results/query_generation.csv @@ -1,6 +1,7 @@ model_name,subtask,score,iterations +gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,6.0/8,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 +gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 diff --git a/benchmark/results/relationship_selection.csv b/benchmark/results/relationship_selection.csv index 9f340f1c..ad53769c 100644 --- a/benchmark/results/relationship_selection.csv +++ b/benchmark/results/relationship_selection.csv @@ -1,8 +1,10 @@ model_name,subtask,score,iterations +gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,3.0/3,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,3.0/3,2 -gpt-3.5-turbo,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 +gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,3.0/3,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +llama-2-chat:13:ggmlv3:q2_K,fe1d6c90419df7a4879f1db6bf2a6699_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 diff --git a/benchmark/test_biocypher_query_generation.py b/benchmark/test_biocypher_query_generation.py index fedafd98..66204c60 100644 --- a/benchmark/test_biocypher_query_generation.py +++ b/benchmark/test_biocypher_query_generation.py @@ -41,18 +41,20 @@ def test_entity_selection( task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) + prompt_engine = get_prompt_engine( + yaml_data["input"]["kg_path"], prompt_engine + ) def run_test(): conversation.reset() # needs to be reset for each test success = prompt_engine._select_entities( - question=yaml_data["prompt"], + question=yaml_data["input"]["prompt"], conversation=conversation, ) assert success score = [] - for expected_entity in yaml_data["entities"]: + for expected_entity in yaml_data["expected"]["entities"]: score.append(expected_entity in prompt_engine.selected_entities) return calculate_test_score(score) @@ -78,10 +80,12 @@ def test_relationship_selection( task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) + prompt_engine = get_prompt_engine( + yaml_data["input"]["kg_path"], prompt_engine + ) - prompt_engine.question = yaml_data["prompt"] - prompt_engine.selected_entities = yaml_data["entities"] + prompt_engine.question = yaml_data["input"]["prompt"] + prompt_engine.selected_entities = yaml_data["expected"]["entities"] # TODO: more generic, for nested structures @@ -91,7 +95,7 @@ def run_test(): assert success score = [] - for expected_relationship_label_key in yaml_data[ + for expected_relationship_label_key in yaml_data["expected"][ "relationship_labels" ].keys(): score.append( @@ -99,7 +103,7 @@ def run_test(): in prompt_engine.selected_relationship_labels.keys() ) - for expected_relationship_label_value in yaml_data[ + for expected_relationship_label_value in yaml_data["expected"][ "relationship_labels" ][expected_relationship_label_key]: try: @@ -135,11 +139,15 @@ def test_property_selection( task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) + prompt_engine = get_prompt_engine( + yaml_data["input"]["kg_path"], prompt_engine + ) - prompt_engine.question = yaml_data["prompt"] - prompt_engine.selected_entities = yaml_data["entities"] - prompt_engine.selected_relationships = yaml_data["relationships"] + prompt_engine.question = yaml_data["input"]["prompt"] + prompt_engine.selected_entities = yaml_data["expected"]["entities"] + prompt_engine.selected_relationships = yaml_data["expected"][ + "relationships" + ] def run_test(): conversation.reset() # needs to be reset for each test @@ -147,7 +155,9 @@ def run_test(): if success: score = [] - for expected_property_key in yaml_data["properties"].keys(): + for expected_property_key in yaml_data["expected"][ + "properties" + ].keys(): try: score.append( expected_property_key @@ -156,9 +166,9 @@ def run_test(): except KeyError: score.append(False) - for expected_property_value in yaml_data["properties"][ - expected_property_key - ]: + for expected_property_value in yaml_data["expected"][ + "properties" + ][expected_property_key]: try: score.append( expected_property_value @@ -169,7 +179,7 @@ def run_test(): except KeyError: score.append(False) else: - score = [False for _ in yaml_data["properties"].keys()] + score = [False for _ in yaml_data["expected"]["properties"].keys()] return calculate_test_score(score) @@ -195,21 +205,23 @@ def test_query_generation( task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) + prompt_engine = get_prompt_engine( + yaml_data["input"]["kg_path"], prompt_engine + ) def run_test(): conversation.reset() # needs to be reset for each test query = prompt_engine._generate_query( - question=yaml_data["prompt"], - entities=yaml_data["entities"], - relationships=yaml_data["relationship_labels"], - properties=yaml_data["properties"], + question=yaml_data["input"]["prompt"], + entities=yaml_data["expected"]["entities"], + relationships=yaml_data["expected"]["relationship_labels"], + properties=yaml_data["expected"]["properties"], query_language="Cypher", conversation=conversation, ) score = [] - for expected_part_of_query in yaml_data["parts_of_query"]: + for expected_part_of_query in yaml_data["expected"]["parts_of_query"]: if isinstance(expected_part_of_query, tuple): score.append( expected_part_of_query[0] in query @@ -243,17 +255,21 @@ def test_end_to_end_query_generation( task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) + prompt_engine = get_prompt_engine( + yaml_data["input"]["kg_path"], prompt_engine + ) def run_test(): conversation.reset() # needs to be reset for each test try: query = prompt_engine.generate_query( - question=yaml_data["prompt"], + question=yaml_data["input"]["prompt"], query_language="Cypher", ) score = [] - for expected_part_of_query in yaml_data["parts_of_query"]: + for expected_part_of_query in yaml_data["expected"][ + "parts_of_query" + ]: if isinstance(expected_part_of_query, tuple): score.append( expected_part_of_query[0] in query @@ -264,7 +280,7 @@ def run_test(): (re.search(expected_part_of_query, query) is not None) ) except ValueError as e: - score = [False for _ in yaml_data["parts_of_query"]] + score = [False for _ in yaml_data["expected"]["parts_of_query"]] return calculate_test_score(score) @@ -373,15 +389,17 @@ def test_property_exists( task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" skip_if_already_run(model_name=model_name, task=task, subtask=subtask) - prompt_engine = get_prompt_engine(yaml_data["kg_path"], prompt_engine) + prompt_engine = get_prompt_engine( + yaml_data["input"]["kg_path"], prompt_engine + ) def run_test(): conversation.reset() # needs to be reset for each test query = prompt_engine._generate_query( - question=yaml_data["prompt"], - entities=yaml_data["entities"], - relationships=yaml_data["relationship_labels"], - properties=yaml_data["properties"], + question=yaml_data["input"]["prompt"], + entities=yaml_data["expected"]["entities"], + relationships=yaml_data["expected"]["relationship_labels"], + properties=yaml_data["expected"]["properties"], query_language="Cypher", conversation=conversation, ) diff --git a/benchmark/test_rag_interpretation.py b/benchmark/test_rag_interpretation.py index ccf93ac5..136e7446 100644 --- a/benchmark/test_rag_interpretation.py +++ b/benchmark/test_rag_interpretation.py @@ -29,10 +29,10 @@ def run_test(): conversation.reset() # needs to be reset for each test [ conversation.append_system_message(m) - for m in test_data_rag_interpretation["system_messages"] + for m in test_data_rag_interpretation["input"]["system_messages"] ] response, _, _ = conversation.query( - test_data_rag_interpretation["prompt"] + test_data_rag_interpretation["input"]["prompt"] ) # lower case, remove punctuation @@ -42,7 +42,9 @@ def run_test(): score = [] - score.append(response == test_data_rag_interpretation["answer"]) + score.append( + response == test_data_rag_interpretation["expected"]["answer"] + ) return calculate_test_score(score) @@ -76,15 +78,15 @@ def run_test(): conversation.reset() # needs to be reset for each test [ conversation.append_system_message(m) - for m in test_data_rag_interpretation["system_messages"] + for m in test_data_rag_interpretation["input"]["system_messages"] ] response, _, _ = conversation.query( - test_data_rag_interpretation["prompt"] + test_data_rag_interpretation["input"]["prompt"] ) msg = ( "You will receive a statement as an answer to this question: " - f"{test_data_rag_interpretation['prompt']} " + f"{test_data_rag_interpretation['input']['prompt']} " "If the statement is an answer to the question, please type 'answer'. " "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. " "Do not type anything except these two options. Here is the statement: " @@ -102,7 +104,7 @@ def run_test(): score = ( [True] - if eval == test_data_rag_interpretation["expected_behaviour"] + if eval == test_data_rag_interpretation["expected"]["behaviour"] else [False] ) From b752893da5b12fa81a8063b20a2946d6d951d9ca Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Fri, 2 Feb 2024 17:52:27 +0100 Subject: [PATCH 17/32] run gpt4 --- .../results/end_to_end_query_generation.csv | 2 + benchmark/results/entity_selection.csv | 2 + ...explicit_relevance_of_single_fragments.csv | 6 +++ ...plicit_relevance_of_multiple_fragments.csv | 2 + .../end_to_end_query_generation.csv | 41 ++++++++++++------- .../entity_selection.csv | 25 ++++++++--- ...explicit_relevance_of_single_fragments.csv | 39 ++++++++++++------ ...plicit_relevance_of_multiple_fragments.csv | 27 ++++++++---- .../overview-aggregated.csv | 41 ++++++++++++------- .../preprocessed_for_frontend/overview.csv | 41 ++++++++++++------- .../property_exists.csv | 35 +++++++++++----- .../property_selection.csv | 33 ++++++++++----- .../query_generation.csv | 41 ++++++++++++------- .../relationship_selection.csv | 29 +++++++++---- benchmark/results/property_exists.csv | 2 + benchmark/results/property_selection.csv | 2 + benchmark/results/query_generation.csv | 2 + benchmark/results/relationship_selection.csv | 2 + 18 files changed, 261 insertions(+), 111 deletions(-) diff --git a/benchmark/results/end_to_end_query_generation.csv b/benchmark/results/end_to_end_query_generation.csv index 633a4e10..d5c9fc36 100644 --- a/benchmark/results/end_to_end_query_generation.csv +++ b/benchmark/results/end_to_end_query_generation.csv @@ -2,6 +2,8 @@ model_name,subtask,score,iterations gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,6.0/8,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,5.0/8,2 +gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,5.5/8,2 +gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/8,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 43fd2db3..7e81a7dc 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -2,6 +2,8 @@ model_name,subtask,score,iterations gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 +gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 +gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 llama-2-chat:13:ggmlv3:q2_K,fe1d6c90419df7a4879f1db6bf2a6699_single_word,0.0/2,2 diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index 3d62922c..6f4aa3ae 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -5,6 +5,12 @@ gpt-3.5-turbo,4d2b9088df3dac7705e3e0ea4f774a55_explicit_evaluation_no,1.0/1,2 gpt-3.5-turbo,8eaeeb846a23455661163a6aff503bd9_explicit_relevance_no,1.0/1,2 gpt-3.5-turbo,c62b234f4d4e8841b405d3389a32cbcf_explicit_relevance_yes,1.0/1,2 gpt-3.5-turbo,eeb3f60d196c661902d99c37b403def0_explicit_relevance_no,1.0/1,2 +gpt-4,2181d4e6c5cb2e08c440ea1fb1e656b1_explicit_relevance_yes,1.0/1,2 +gpt-4,3e47253bf5b263d4776c5ff16fc02f12_explicit_evaluation_yes,1.0/1,2 +gpt-4,4d2b9088df3dac7705e3e0ea4f774a55_explicit_evaluation_no,1.0/1,2 +gpt-4,8eaeeb846a23455661163a6aff503bd9_explicit_relevance_no,1.0/1,2 +gpt-4,c62b234f4d4e8841b405d3389a32cbcf_explicit_relevance_yes,1.0/1,2 +gpt-4,eeb3f60d196c661902d99c37b403def0_explicit_relevance_no,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv index f1f10b8b..04368c1b 100644 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -1,6 +1,8 @@ model_name,subtask,score,iterations gpt-3.5-turbo,828dd99e7080411673c30e5b16d777da_implicit_relevance_no,1.0/1,2 gpt-3.5-turbo,a4aecd18bfc529901926ce7da7cf0048_implicit_relevance_yes,1.0/1,2 +gpt-4,828dd99e7080411673c30e5b16d777da_implicit_relevance_no,1.0/1,2 +gpt-4,a4aecd18bfc529901926ce7da7cf0048_implicit_relevance_yes,1.0/1,2 llama-2-chat:13:ggmlv3:q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 llama-2-chat:13:ggmlv3:q3_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 llama-2-chat:13:ggmlv3:q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 diff --git a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv index 37990071..5886ed55 100644 --- a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv @@ -1,15 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,14.0,14.0,1.0,2 -gpt-4,14.0,14.0,1.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,14.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_0,0.0,14.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,14.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,14.0,0.0,2 -llama-2-chat:7:ggmlv3:q2_K,0.0,14.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,14.0,0.0,2 -llama-2-chat:7:ggmlv3:q5_0,0.0,14.0,0.0,2 -llama-2-chat:7:ggmlv3:q8_0,0.0,14.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,14.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,14.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,14.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,14.0,0.0,2 +gpt-4,11.5,16.0,0.71875,2 +gpt-3.5-turbo,17.0,24.0,0.7083333333333334,2 +llama-2-chat:7:ggmlv3:q4_0,0.0,16.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,16.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,16.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,16.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q8_0,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q6_K,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q5_K_M,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q5_0,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_S,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_M,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_1,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q3_K_M,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q2_K,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q8_0,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q6_K,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_0,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_S,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_1,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_0,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,16.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,16.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/entity_selection.csv b/benchmark/results/preprocessed_for_frontend/entity_selection.csv index 19be5640..669d54f0 100644 --- a/benchmark/results/preprocessed_for_frontend/entity_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/entity_selection.csv @@ -1,15 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,4.0,4.0,1.0,2 +gpt-3.5-turbo,6.0,6.0,1.0,2 gpt-4,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q2_K,2.0,4.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_K_M,3.0,4.0,0.75,2 +llama-2-chat:7:ggmlv3:q3_K_M,3.0,4.0,0.75,2 +llama-2-chat:7:ggmlv3:q4_1,2.0,4.0,0.5,2 llama-2-chat:7:ggmlv3:q4_0,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q5_0,2.0,4.0,0.5,2 llama-2-chat:7:ggmlv3:q8_0,2.0,4.0,0.5,2 -llama-2-chat:13:ggmlv3:q4_0,1.0,4.0,0.25,2 +llama-2-chat:7:ggmlv3:q6_K,2.0,4.0,0.5,2 +llama-2-chat:7:ggmlv3:q5_K_M,2.0,4.0,0.5,2 +llama-2-chat:7:ggmlv3:q5_0,2.0,4.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_K_S,2.0,4.0,0.5,2 +llama-2-chat:7:ggmlv3:q2_K,2.0,4.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,4.0,0.25,2 +llama-2-chat:13:ggmlv3:q4_0,1.0,4.0,0.25,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1.0,4.0,0.25,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_1,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q8_0,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q6_K,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_0,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_S,0.0,4.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,4.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,4.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,4.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv index 7a92300f..c4b93710 100644 --- a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv @@ -1,15 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,3.0,4.0,0.75,2 -gpt-4,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_0,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,4.0,0.0,2 -llama-2-chat:7:ggmlv3:q2_K,0.0,4.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,4.0,0.0,2 -llama-2-chat:7:ggmlv3:q5_0,0.0,4.0,0.0,2 -llama-2-chat:7:ggmlv3:q8_0,0.0,4.0,0.0,2 +gpt-3.5-turbo,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q2_K,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q8_0,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q6_K,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q5_K_M,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q5_0,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_K_S,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_K_M,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_1,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_0,4.0,4.0,1.0,2 +gpt-4,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q3_K_M,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q8_0,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q6_K,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q5_0,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_K_S,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_K_M,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_1,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_0,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q2_K,3.0,4.0,0.75,2 +llama-2-chat:13:ggmlv3:q3_K_M,2.0,4.0,0.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.0,4.0,0.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.0,4.0,0.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,4.0,0.25,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,4.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,4.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,4.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,4.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,4.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv index e5004b54..6c25aa98 100644 --- a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv @@ -1,15 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,1.0,1.0,1.0,2 -gpt-4,1.0,1.0,1.0,2 +gpt-3.5-turbo,2.0,2.0,1.0,2 +gpt-4,2.0,2.0,1.0,2 llama-2-chat:13:ggmlv3:q2_K,1.0,1.0,1.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.0,1.0,1.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,1.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1.0,1.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_0,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q5_0,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q8_0,0.5,1.0,0.5,2 -llama-2-chat:7:ggmlv3:q5_0,0.5,1.0,0.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.0,1.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.5,1.0,0.5,2 +llama-2-chat:7:ggmlv3:q6_K,0.5,1.0,0.5,2 +llama-2-chat:7:ggmlv3:q5_K_M,0.5,1.0,0.5,2 +llama-2-chat:7:ggmlv3:q5_0,0.5,1.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_K_M,0.5,1.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_1,0.5,1.0,0.5,2 +llama-2-chat:7:ggmlv3:q3_K_M,0.5,1.0,0.5,2 +llama-2-chat:13:ggmlv3:q8_0,0.5,1.0,0.5,2 +llama-2-chat:13:ggmlv3:q6_K,0.5,1.0,0.5,2 +llama-2-chat:13:ggmlv3:q5_K_M,0.5,1.0,0.5,2 +llama-2-chat:13:ggmlv3:q5_0,0.5,1.0,0.5,2 +llama-2-chat:13:ggmlv3:q4_K_S,0.5,1.0,0.5,2 +llama-2-chat:13:ggmlv3:q4_1,0.5,1.0,0.5,2 +llama-2-chat:13:ggmlv3:q4_0,0.5,1.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.5,1.0,0.5,2 llama-2-chat:7:ggmlv3:q2_K,0.0,1.0,0.0,2 llama-2-chat:7:ggmlv3:q4_0,0.0,1.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_S,0.0,1.0,0.0,2 llama-2-chat:7:ggmlv3:q8_0,0.0,1.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,1.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,1.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv index 62dcc509..65db68b4 100644 --- a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv +++ b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv @@ -1,15 +1,28 @@ Model name,Mean -gpt-3.5-turbo,0.9246323529411764 -gpt-4,0.8411239495798319 -llama-2-chat:7:ggmlv3:q5_0,0.35714285714285715 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.3348214285714286 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.328125 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.296875 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.2901785714285714 -llama-2-chat:13:ggmlv3:q8_0,0.28273809523809523 -llama-2-chat:13:ggmlv3:q5_0,0.26785714285714285 -llama-2-chat:7:ggmlv3:q4_0,0.26339285714285715 -llama-2-chat:7:ggmlv3:q8_0,0.25297619047619047 -llama-2-chat:13:ggmlv3:q4_0,0.2455357142857143 -llama-2-chat:7:ggmlv3:q2_K,0.23030462184873948 -llama-2-chat:13:ggmlv3:q2_K,0.19642857142857142 +gpt-4,0.9002757352941176 +gpt-3.5-turbo,0.8854166666666667 +llama-2-chat:7:ggmlv3:q2_K,0.5372242647058824 +llama-2-chat:7:ggmlv3:q3_K_M,0.53515625 +llama-2-chat:7:ggmlv3:q5_0,0.4739583333333333 +llama-2-chat:13:ggmlv3:q5_0,0.4609375 +llama-2-chat:7:ggmlv3:q4_0,0.44140625 +llama-2-chat:7:ggmlv3:q4_K_M,0.43046875 +llama-2-chat:13:ggmlv3:q4_1,0.4296875 +llama-2-chat:7:ggmlv3:q5_K_M,0.42421875 +llama-2-chat:13:ggmlv3:q4_0,0.4140625 +llama-2-chat:7:ggmlv3:q6_K,0.403125 +llama-2-chat:7:ggmlv3:q4_1,0.403125 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.3984375 +llama-2-chat:13:ggmlv3:q5_K_M,0.3984375 +llama-2-chat:13:ggmlv3:q6_K,0.3776041666666667 +llama-2-chat:13:ggmlv3:q8_0,0.3776041666666667 +llama-2-chat:7:ggmlv3:q4_K_S,0.3697916666666667 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.359375 +llama-2-chat:13:ggmlv3:q4_K_S,0.3528645833333333 +llama-2-chat:7:ggmlv3:q8_0,0.34765625 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.3333333333333333 +llama-2-chat:13:ggmlv3:q2_K,0.33035714285714285 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.328125 +llama-2-chat:13:ggmlv3:q4_K_M,0.3151041666666667 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.3020833333333333 +llama-2-chat:13:ggmlv3:q3_K_M,0.23958333333333331 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index 461da899..e33cccf2 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,15 +1,28 @@ Model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean -gpt-3.5-turbo,0.6470588235294118,1.0,0.75,1.0,1.0,1.0,1.0,1.0,0.9246323529411764 -gpt-4,0.7647058823529411,0.9642857142857144,0.0,1.0,1.0,1.0,1.0,1.0,0.8411239495798319 -llama-2-chat:7:ggmlv3:q5_0,0.0,0.8571428571428571,0.0,0.5,0.5,0.0,0.5,0.5,0.35714285714285715 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.9285714285714286,0.0,0.0,0.0,0.0,1.0,0.75,0.3348214285714286 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,1.0,0.0,0.25,0.25,0.0,0.5,0.625,0.328125 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,1.0,0.0,0.25,0.0,0.0,0.5,0.625,0.296875 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.8214285714285714,0.0,0.0,0.0,0.0,1.0,0.5,0.2901785714285714 -llama-2-chat:13:ggmlv3:q8_0,0.0,0.9285714285714286,0.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.28273809523809523 -llama-2-chat:13:ggmlv3:q5_0,0.0,0.8928571428571429,0.0,0.0,0.0,0.0,0.5,0.75,0.26785714285714285 -llama-2-chat:7:ggmlv3:q4_0,0.0,0.6071428571428571,0.0,0.5,0.0,0.0,0.0,1.0,0.26339285714285715 -llama-2-chat:7:ggmlv3:q8_0,0.0,0.8571428571428571,0.0,0.5,0.0,0.0,0.0,0.6666666666666666,0.25297619047619047 -llama-2-chat:13:ggmlv3:q4_0,0.0,0.7142857142857143,0.0,0.25,0.0,0.0,0.5,0.5,0.2455357142857143 -llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.6071428571428571,0.0,0.5,0.0,0.0,0.0,0.5,0.23030462184873948 -llama-2-chat:13:ggmlv3:q2_K,0.0,0.5714285714285714,0.0,0.0,0.0,0.0,1.0,0.0,0.19642857142857142 +gpt-4,0.7647058823529411,0.71875,1.0,1.0,1.0,0.71875,1.0,1.0,0.9002757352941176 +gpt-3.5-turbo,0.6666666666666666,0.7083333333333334,1.0,1.0,1.0,0.7083333333333334,1.0,1.0,0.8854166666666667 +llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.5625,1.0,0.5,0.0,0.0,0.0,2.0,0.5372242647058824 +llama-2-chat:7:ggmlv3:q3_K_M,0.0,0.65625,1.0,0.75,0.5,0.0,0.5,0.875,0.53515625 +llama-2-chat:7:ggmlv3:q5_0,0.0,0.625,1.0,0.5,0.5,0.0,0.5,0.6666666666666666,0.4739583333333333 +llama-2-chat:13:ggmlv3:q5_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.5,0.4609375 +llama-2-chat:7:ggmlv3:q4_0,0.0,0.53125,1.0,0.5,0.0,0.0,0.0,1.5,0.44140625 +llama-2-chat:7:ggmlv3:q4_K_M,0.0,0.59375,1.0,0.75,0.0,0.0,0.5,0.6,0.43046875 +llama-2-chat:13:ggmlv3:q4_1,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.25,0.4296875 +llama-2-chat:7:ggmlv3:q5_K_M,0.0,0.59375,1.0,0.5,0.0,0.0,0.5,0.8,0.42421875 +llama-2-chat:13:ggmlv3:q4_0,0.0,0.5625,1.0,0.25,0.0,0.0,0.5,1.0,0.4140625 +llama-2-chat:7:ggmlv3:q6_K,0.0,0.625,1.0,0.5,0.0,0.0,0.5,0.6,0.403125 +llama-2-chat:7:ggmlv3:q4_1,0.0,0.625,1.0,0.5,0.0,0.0,0.5,0.6,0.403125 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.6875,0.0,0.0,0.0,0.0,1.0,1.5,0.3984375 +llama-2-chat:13:ggmlv3:q5_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.0,0.3984375 +llama-2-chat:13:ggmlv3:q6_K,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667 +llama-2-chat:13:ggmlv3:q8_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667 +llama-2-chat:7:ggmlv3:q4_K_S,0.0,0.625,1.0,0.5,0.0,0.0,0.0,0.8333333333333334,0.3697916666666667 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.75,0.5,0.0,0.0,0.0,1.0,0.625,0.359375 +llama-2-chat:13:ggmlv3:q4_K_S,0.0,0.65625,1.0,0.0,0.0,0.0,0.5,0.6666666666666666,0.3528645833333333 +llama-2-chat:7:ggmlv3:q8_0,0.0,0.65625,1.0,0.5,0.0,0.0,0.0,0.625,0.34765625 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,0.75,0.25,0.25,0.25,0.0,0.5,0.6666666666666666,0.3333333333333333 +llama-2-chat:13:ggmlv3:q2_K,0.0,0.5625,0.75,0.0,0.0,0.0,1.0,,0.33035714285714285 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,0.75,0.5,0.25,0.0,0.0,0.5,0.625,0.328125 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.0,0.8333333333333334,0.3151041666666667 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,0.75,0.0,0.0,0.0,0.0,1.0,0.6666666666666666,0.3020833333333333 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,0.75,0.5,0.0,0.0,0.0,0.0,0.6666666666666666,0.23958333333333331 diff --git a/benchmark/results/preprocessed_for_frontend/property_exists.csv b/benchmark/results/preprocessed_for_frontend/property_exists.csv index 713bcd89..3051bfc2 100644 --- a/benchmark/results/preprocessed_for_frontend/property_exists.csv +++ b/benchmark/results/preprocessed_for_frontend/property_exists.csv @@ -1,15 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q2_K,2.0,1.0,2.0,2 +llama-2-chat:7:ggmlv3:q4_0,3.0,2.0,1.5,2 +llama-2-chat:13:ggmlv3:q5_0,1.5,1.0,1.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.5,1.0,1.5,2 +llama-2-chat:13:ggmlv3:q4_1,2.5,2.0,1.25,2 gpt-4,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_0,3.0,3.0,1.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,3.0,3.0,1.0,2 +gpt-3.5-turbo,6.0,6.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_0,1.0,1.0,1.0,2 +llama-2-chat:7:ggmlv3:q3_K_M,3.5,4.0,0.875,2 llama-2-chat:13:ggmlv3:q8_0,2.5,3.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q5_0,1.5,2.0,0.75,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,3.0,4.0,0.75,2 -llama-2-chat:7:ggmlv3:q8_0,2.0,3.0,0.6666666666666666,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.5,4.0,0.625,2 +llama-2-chat:13:ggmlv3:q4_K_M,2.5,3.0,0.8333333333333334,2 +llama-2-chat:7:ggmlv3:q4_K_S,2.5,3.0,0.8333333333333334,2 +llama-2-chat:13:ggmlv3:q6_K,2.5,3.0,0.8333333333333334,2 +llama-2-chat:7:ggmlv3:q5_K_M,4.0,5.0,0.8,2 +llama-2-chat:13:ggmlv3:q4_K_S,2.0,3.0,0.6666666666666666,2 +llama-2-chat:13:ggmlv3:q3_K_M,2.0,3.0,0.6666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 +llama-2-chat:7:ggmlv3:q5_0,2.0,3.0,0.6666666666666666,2 +llama-2-chat:7:ggmlv3:q8_0,2.5,4.0,0.625,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,4.0,0.625,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.5,4.0,0.625,2 -llama-2-chat:13:ggmlv3:q4_0,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q2_K,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q5_0,2.5,5.0,0.5,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.0,2.0,0.5,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,2.0,0.0,2 +llama-2-chat:7:ggmlv3:q6_K,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q4_K_M,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q4_1,3.0,5.0,0.6,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,0.0,,2 diff --git a/benchmark/results/preprocessed_for_frontend/property_selection.csv b/benchmark/results/preprocessed_for_frontend/property_selection.csv index 9301f211..765c4336 100644 --- a/benchmark/results/preprocessed_for_frontend/property_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/property_selection.csv @@ -1,15 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-4,13.0,17.0,0.7647058823529411,2 -gpt-3.5-turbo,11.0,17.0,0.6470588235294118,2 +gpt-3.5-turbo,16.0,24.0,0.6666666666666666,2 llama-2-chat:7:ggmlv3:q2_K,4.0,17.0,0.23529411764705882,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q5_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q6_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q5_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_S,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q6_K,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_S,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/query_generation.csv b/benchmark/results/preprocessed_for_frontend/query_generation.csv index 926977f5..975132fe 100644 --- a/benchmark/results/preprocessed_for_frontend/query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/query_generation.csv @@ -1,15 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,14.0,14.0,1.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,14.0,14.0,1.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,14.0,14.0,1.0,2 -gpt-4,13.5,14.0,0.9642857142857143,2 -llama-2-chat:13:ggmlv3:q8_0,13.0,14.0,0.9285714285714286,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,13.0,14.0,0.9285714285714286,2 -llama-2-chat:13:ggmlv3:q5_0,12.5,14.0,0.8928571428571429,2 -llama-2-chat:7:ggmlv3:q5_0,12.0,14.0,0.8571428571428571,2 -llama-2-chat:7:ggmlv3:q8_0,12.0,14.0,0.8571428571428571,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,11.5,14.0,0.8214285714285714,2 -llama-2-chat:13:ggmlv3:q4_0,10.0,14.0,0.7142857142857143,2 -llama-2-chat:7:ggmlv3:q2_K,8.5,14.0,0.6071428571428571,2 -llama-2-chat:7:ggmlv3:q4_0,8.5,14.0,0.6071428571428571,2 -llama-2-chat:13:ggmlv3:q2_K,8.0,14.0,0.5714285714285714,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,12.0,16.0,0.75,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,12.0,16.0,0.75,2 +llama-2-chat:13:ggmlv3:q3_K_M,12.0,16.0,0.75,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,12.0,16.0,0.75,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,12.0,16.0,0.75,2 +gpt-4,11.5,16.0,0.71875,2 +gpt-3.5-turbo,17.0,24.0,0.7083333333333334,2 +llama-2-chat:13:ggmlv3:q5_0,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q5_K_M,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q6_K,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q8_0,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q4_1,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q4_K_M,11.0,16.0,0.6875,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q4_K_S,10.5,16.0,0.65625,2 +llama-2-chat:7:ggmlv3:q8_0,10.5,16.0,0.65625,2 +llama-2-chat:7:ggmlv3:q3_K_M,10.5,16.0,0.65625,2 +llama-2-chat:7:ggmlv3:q5_0,10.0,16.0,0.625,2 +llama-2-chat:7:ggmlv3:q6_K,10.0,16.0,0.625,2 +llama-2-chat:7:ggmlv3:q4_1,10.0,16.0,0.625,2 +llama-2-chat:7:ggmlv3:q4_K_S,10.0,16.0,0.625,2 +llama-2-chat:7:ggmlv3:q4_K_M,9.5,16.0,0.59375,2 +llama-2-chat:7:ggmlv3:q5_K_M,9.5,16.0,0.59375,2 +llama-2-chat:7:ggmlv3:q2_K,9.0,16.0,0.5625,2 +llama-2-chat:13:ggmlv3:q4_0,9.0,16.0,0.5625,2 +llama-2-chat:13:ggmlv3:q2_K,9.0,16.0,0.5625,2 +llama-2-chat:7:ggmlv3:q4_0,8.5,16.0,0.53125,2 diff --git a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv index 70ef643b..aa7c53e8 100644 --- a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv @@ -1,15 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,6.0,6.0,1.0,2 +gpt-3.5-turbo,9.0,9.0,1.0,2 gpt-4,6.0,6.0,1.0,2 llama-2-chat:7:ggmlv3:q5_0,3.0,6.0,0.5,2 +llama-2-chat:7:ggmlv3:q3_K_M,3.0,6.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.5,6.0,0.25,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q2_K,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q6_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,9.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_S,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q2_K,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q6_K,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_S,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/property_exists.csv b/benchmark/results/property_exists.csv index c997545e..ea12ff73 100644 --- a/benchmark/results/property_exists.csv +++ b/benchmark/results/property_exists.csv @@ -2,6 +2,8 @@ model_name,subtask,score,iterations gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 +gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 +gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/0,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/0,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 diff --git a/benchmark/results/property_selection.csv b/benchmark/results/property_selection.csv index c404718c..691fd9e4 100644 --- a/benchmark/results/property_selection.csv +++ b/benchmark/results/property_selection.csv @@ -2,6 +2,8 @@ model_name,subtask,score,iterations gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,5.0/7,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/7,2 gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/10,2 +gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,7.0/7,2 +gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/10,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 diff --git a/benchmark/results/query_generation.csv b/benchmark/results/query_generation.csv index 647e0af7..064a87e6 100644 --- a/benchmark/results/query_generation.csv +++ b/benchmark/results/query_generation.csv @@ -2,6 +2,8 @@ model_name,subtask,score,iterations gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,6.0/8,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,5.0/8,2 +gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,5.5/8,2 +gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/8,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 diff --git a/benchmark/results/relationship_selection.csv b/benchmark/results/relationship_selection.csv index ad53769c..fe5d0069 100644 --- a/benchmark/results/relationship_selection.csv +++ b/benchmark/results/relationship_selection.csv @@ -2,6 +2,8 @@ model_name,subtask,score,iterations gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,3.0/3,2 gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,3.0/3,2 gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,3.0/3,2 +gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,3.0/3,2 +gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,3.0/3,2 llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 llama-2-chat:13:ggmlv3:q2_K,fe1d6c90419df7a4879f1db6bf2a6699_single_word,0.0/3,2 From 512492e260cc5fdec6d95082f44592af45c02033 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sat, 3 Feb 2024 16:36:24 +0100 Subject: [PATCH 18/32] multi-input test cases --- benchmark/data/benchmark_data.yaml | 98 +++++++++++++++--------------- 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/benchmark/data/benchmark_data.yaml b/benchmark/data/benchmark_data.yaml index 690e2bf6..725bf167 100644 --- a/benchmark/data/benchmark_data.yaml +++ b/benchmark/data/benchmark_data.yaml @@ -1,12 +1,18 @@ -# could make sense to divide in general categories, e.g.: -# file: test_schema_info.yaml (optional) -# input: prompt (user question, system messages) -# expected: entities etc. -# assertion method? (string to evaluate, e.g. "assert 'Gene' in entities") +# Top-level keys: benchmark modules +# Values: list of dictionaries, each containing a test case +# +# Test case keys: +# - input (for creating the test) +# - expected (for asserting ourcomes and generating a score) +# - case (for categorizing the test case) +# +# If any input is a dictionary itself, it will be expanded into separate test +# cases, using the top-level key to create a concatenated test case purpose. biocypher_query_generation: - - input: + - case: single_word + input: kg_path: test_schema_info.yaml - prompt: Which genes are associated with mucoviscidosis? + prompt: Which genes are associated with mucoviscidosis ? expected: entities: ["Gene", "Disease"] relationships: ["GeneToPhenotypeAssociation"] @@ -36,8 +42,9 @@ biocypher_query_generation: "WHERE", "{name:}", ] - test_case_purpose: single_word - - input: + + - case: multi_word + input: kg_path: test_schema_info.yaml prompt: Which genes are expressed in fibroblasts? expected: @@ -63,50 +70,43 @@ biocypher_query_generation: "WHERE", "{name:}", ] - test_case_purpose: multi_word + rag_interpretation: - - input: + - case: explicit_relevance_no + input: prompt: Which molecular pathways are associated with cancer? system_messages: - [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", - "The earth is a globe.", - ] + simple: + [ + "You will receive a text fragment to help answer the user's question. Your task is to judge this text fragment for relevance to the user's question, and return either 'yes' or 'no'! Here is the fragment: ", + "The earth is a globe.", + ] + more_explicit: + [ + "You will receive a text fragment to help answer the user's question. Your task is to judge this text fragment for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", + "The earth is a globe.", + ] + repeat_instruction: + [ + "You will receive a text fragment to help answer the user's question. You should only respond with 'yes' or 'no' without additional words. Your task is to judge this text fragment for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", + "The earth is a globe.", + ] expected: answer: "no" - test_case_purpose: explicit_relevance_no - - input: + + - case: explicit_relevance_yes + input: prompt: Which molecular pathways are associated with cancer? system_messages: [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", - "TP53 is important in the regulation of cellular death.", + "You will receive a text fragment to help answer the user's question. Your task is to judge this text fragment for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", "The EGFR pathway is deregulated in a number of cancers.", ] expected: answer: "yes" - test_case_purpose: explicit_relevance_yes - - input: - prompt: Which molecular pathways are associated with cancer? - system_messages: - [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", - "The EGFR pathway is deregulated in a number of cancers.", - ] - expected: - answer: "yes" - test_case_purpose: explicit_relevance_yes - - input: - prompt: Which molecular pathways are associated with cancer? - system_messages: - [ - "You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'; only respond with one word, do not offer explanation or justification! Here is the fragment: ", - "The Human is the most endurant mammal.", - ] - expected: - answer: "no" - test_case_purpose: explicit_relevance_no - - input: + + - case: explicit_evaluation_no + input: prompt: "I'm sorry, but the given text fragments do not provide any relevant information about molecular pathways associated with cancer." system_messages: [ @@ -118,8 +118,9 @@ rag_interpretation: ] expected: answer: "decline" - test_case_purpose: explicit_evaluation_no - - input: + + - case: explicit_evaluation_yes + input: prompt: "There are several molecular pathways that are associated with cancer, for instance TP53, BRCA1, the STAT inflammatory pathway, and the EGFR pathway." system_messages: [ @@ -131,8 +132,9 @@ rag_interpretation: ] expected: answer: "answer" - test_case_purpose: explicit_evaluation_yes - - input: + + - case: implicit_relevance_no + input: prompt: Which molecular pathways are associated with cancer? system_messages: [ @@ -144,8 +146,9 @@ rag_interpretation: ] expected: behaviour: "decline" - test_case_purpose: implicit_relevance_no - - input: + + - case: implicit_relevance_yes + input: prompt: Which molecular pathways are associated with cancer? system_messages: [ @@ -157,4 +160,3 @@ rag_interpretation: ] expected: behaviour: "answer" - test_case_purpose: implicit_relevance_yes From 9050073cb02a96ee414cdab20bb9403b309f68e8 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sat, 3 Feb 2024 16:36:52 +0100 Subject: [PATCH 19/32] expand test cases with multiple inputs --- benchmark/load_dataset.py | 89 ++++++++++++++++++++++++--------------- 1 file changed, 54 insertions(+), 35 deletions(-) diff --git a/benchmark/load_dataset.py b/benchmark/load_dataset.py index f6c34fa3..38aa4e34 100644 --- a/benchmark/load_dataset.py +++ b/benchmark/load_dataset.py @@ -55,29 +55,6 @@ def _load_test_data_from_this_repository(): directory_path = "./benchmark/data" files_in_directory = _get_all_files(directory_path) - # old csv implementation - test_data_csv = {} - for file_path in files_in_directory: - if file_path.endswith(".csv"): - df = pd.read_csv(file_path, sep=";") - _apply_literal_eval( - df, - [ - "entities", - "relationships", - "relationship_labels", - "properties", - "parts_of_query", - "system_messages", - ], - ) - test_data_csv[file_path.replace("./benchmark/", "./")] = df - elif file_path.endswith(".yaml"): - test_data_csv[file_path.replace("./benchmark/", "./")] = ( - yaml.safe_load(file_path) - ) - - # new yaml implementation test_data = {} for file_path in files_in_directory: if file_path.endswith(".yaml"): @@ -85,19 +62,12 @@ def _load_test_data_from_this_repository(): try: yaml_data = yaml.safe_load(stream) - # every dictionary in the list of dictionaries that is under - # any top level key gets a hash field that is the md5 hash - # of the dictionary + if "_data" in file_path: + # expand multi-instruction tests + yaml_data = _expand_multi_instruction(yaml_data) - for key in yaml_data.keys(): - if isinstance(yaml_data[key], list): - for i in range(len(yaml_data[key])): - if isinstance(yaml_data[key][i], dict): - yaml_data[key][i]["hash"] = hashlib.md5( - json.dumps(yaml_data[key][i]).encode( - "utf-8" - ) - ).hexdigest() + # generate hash for each case + yaml_data = _hash_each_case(yaml_data) test_data[file_path.replace("./benchmark/", "./")] = ( yaml_data @@ -109,6 +79,55 @@ def _load_test_data_from_this_repository(): return test_data +def _hash_each_case(data_dict: dict) -> dict: + """ + Create a hash for each case in the test data to identify tests that have + been run or modified. + + Args: + data_dict (dict): The yaml data. + + Returns: + dict: The yaml data with a hash for each case. + """ + for key in data_dict.keys(): + if isinstance(data_dict[key], list): + for i in range(len(data_dict[key])): + if isinstance(data_dict[key][i], dict): + data_dict[key][i]["hash"] = hashlib.md5( + json.dumps(data_dict[key][i]).encode("utf-8") + ).hexdigest() + + return data_dict + + +def _expand_multi_instruction(data_dict: dict) -> dict: + """ + Expands tests with input dictionaries that contain dictionaries. + + Args: + data_dict (dict): The yaml data. + + Returns: + dict: The expanded yaml data. + """ + for module_key in data_dict.keys(): + test_list = data_dict[module_key] + for test in test_list: + test_input = test["input"] + for case, potential_subcase in test_input.items(): + if isinstance(potential_subcase, dict): + for key, value in potential_subcase.items(): + new_case = test.copy() + new_case["case"] = "_".join([test["case"], key]) + new_case["input"][case] = value + test_list.append(new_case) + test_list.remove(test) + data_dict[module_key] = test_list + + return data_dict + + def _get_private_key_from_env_variable() -> rsa.PrivateKey: """Get the private key from an environment variable. From 362ced0cdb831974cfcabad15a10dd0973e18982 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sat, 3 Feb 2024 16:37:32 +0100 Subject: [PATCH 20/32] separate hash from subtask and skip based .. on only model name and hash --- benchmark/benchmark_utils.py | 59 +++++++++++++------- benchmark/conftest.py | 11 +++- benchmark/test_biocypher_query_generation.py | 48 ++++++++++------ benchmark/test_rag_interpretation.py | 48 ++++++++-------- 4 files changed, 100 insertions(+), 66 deletions(-) diff --git a/benchmark/benchmark_utils.py b/benchmark/benchmark_utils.py index f8dd1514..bdc97eff 100644 --- a/benchmark/benchmark_utils.py +++ b/benchmark/benchmark_utils.py @@ -6,7 +6,7 @@ def benchmark_already_executed( model_name: str, task: str, - subtask: str, + md5_hash: str, ) -> bool: """ @@ -18,26 +18,35 @@ def benchmark_already_executed( task (str): The benchmark task, e.g. "biocypher_query_generation" - subtask (str): The benchmark subtask test case, e.g., - "72434e7a340a3f6dd047b944988491b7_single_word". It is composed of - the md5 hash of the test case and the test case purpose. + md5_hash (str): The md5 hash of the test case, e.g., + "72434e7a340a3f6dd047b944988491b7". It is created from the + dictionary representation of the test case. Returns: - bool: True if the benchmark task and subtask for the model_name has - already been run, False otherwise + bool: True if the benchmark case for the model_name has already been + run, False otherwise """ task_results = return_or_create_result_file(task) - task_results_subset = (task_results["model_name"] == model_name) & ( - task_results["subtask"] == subtask + + if task_results.empty: + return False + + run = ( + task_results[ + (task_results["model_name"] == model_name) + & (task_results["md5_hash"] == md5_hash) + ].shape[0] + > 0 ) - return task_results_subset.any() + + return run def skip_if_already_run( model_name: str, task: str, - subtask: str, + md5_hash: str, ) -> None: """Helper function to check if the test case is already executed. @@ -46,13 +55,13 @@ def skip_if_already_run( task (str): The benchmark task, e.g. "biocypher_query_generation" - subtask (str): The benchmark subtask test case, e.g., - "72434e7a340a3f6dd047b944988491b7_single_word". It is composed of - the md5 hash of the test case and the test case purpose. + md5_hash (str): The md5 hash of the test case, e.g., + "72434e7a340a3f6dd047b944988491b7". It is created from the + dictionary representation of the test case. """ - if benchmark_already_executed(model_name, task, subtask): + if benchmark_already_executed(model_name, task, md5_hash): pytest.skip( - f"benchmark {task}: {subtask} with {model_name} already executed" + f"Benchmark for {task} with hash {md5_hash} with {model_name} already executed" ) @@ -73,26 +82,34 @@ def return_or_create_result_file( results = pd.read_csv(file_path, header=0) except (pd.errors.EmptyDataError, FileNotFoundError): results = pd.DataFrame( - columns=["model_name", "subtask", "score", "iterations"] + columns=["model_name", "subtask", "score", "iterations", "md5_hash"] ) results.to_csv(file_path, index=False) return results def write_results_to_file( - model_name: str, subtask: str, score: str, iterations: str, file_path: str + model_name: str, + subtask: str, + score: str, + iterations: str, + md5_hash: str, + file_path: str, ): """Writes the benchmark results for the subtask to the result file. Args: model_name (str): The model name, e.g. "gpt-3.5-turbo" - subtask (str): The benchmark subtask test case, e.g. "entities_0" - score (str): The benchmark score, e.g. "1/1" - iterations (str): The number of iterations, e.g. "1" + subtask (str): The benchmark subtask test case, e.g. "entities" + score (str): The benchmark score, e.g. "5" + iterations (str): The number of iterations, e.g. "7" + md5_hash (str): The md5 hash of the test case + file_path (str): The path to the result file """ results = pd.read_csv(file_path, header=0) new_row = pd.DataFrame( - [[model_name, subtask, score, iterations]], columns=results.columns + [[model_name, subtask, score, iterations, md5_hash]], + columns=results.columns, ) results = pd.concat([results, new_row], ignore_index=True).sort_values( by=["model_name", "subtask"] diff --git a/benchmark/conftest.py b/benchmark/conftest.py index 39c60bc5..7e8522f9 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -269,19 +269,26 @@ def result_files(): result_file = pd.read_csv(file, header=0) except (pd.errors.EmptyDataError, FileNotFoundError): result_file = pd.DataFrame( - columns=["model_name", "subtask", "score", "iterations"] + columns=[ + "model_name", + "subtask", + "score", + "iterations", + "md5_hash", + ] ) result_file.to_csv(file, index=False) if not np.array_equal( result_file.columns, - ["model_name", "subtask", "score", "iterations"], + ["model_name", "subtask", "score", "iterations", "md5_hash"], ): result_file.columns = [ "model_name", "subtask", "score", "iterations", + "md5_hash", ] result_files[file] = result_file diff --git a/benchmark/test_biocypher_query_generation.py b/benchmark/test_biocypher_query_generation.py index 66204c60..35099ea8 100644 --- a/benchmark/test_biocypher_query_generation.py +++ b/benchmark/test_biocypher_query_generation.py @@ -39,8 +39,9 @@ def test_entity_selection( ): yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" - skip_if_already_run(model_name=model_name, task=task, subtask=subtask) + skip_if_already_run( + model_name=model_name, task=task, md5_hash=yaml_data["hash"] + ) prompt_engine = get_prompt_engine( yaml_data["input"]["kg_path"], prompt_engine ) @@ -62,9 +63,10 @@ def run_test(): write_results_to_file( prompt_engine.model_name, - subtask, + yaml_data["case"], f"{mean_score}/{max}", f"{n_iterations}", + yaml_data["hash"], get_result_file_path(task), ) @@ -78,8 +80,9 @@ def test_relationship_selection( ): yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" - skip_if_already_run(model_name=model_name, task=task, subtask=subtask) + skip_if_already_run( + model_name=model_name, task=task, md5_hash=yaml_data["hash"] + ) prompt_engine = get_prompt_engine( yaml_data["input"]["kg_path"], prompt_engine ) @@ -121,9 +124,10 @@ def run_test(): write_results_to_file( prompt_engine.model_name, - subtask, + yaml_data["case"], f"{mean_score}/{max}", f"{n_iterations}", + yaml_data["hash"], get_result_file_path(task), ) @@ -137,8 +141,9 @@ def test_property_selection( ): yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" - skip_if_already_run(model_name=model_name, task=task, subtask=subtask) + skip_if_already_run( + model_name=model_name, task=task, md5_hash=yaml_data["hash"] + ) prompt_engine = get_prompt_engine( yaml_data["input"]["kg_path"], prompt_engine ) @@ -187,9 +192,10 @@ def run_test(): write_results_to_file( prompt_engine.model_name, - subtask, + yaml_data["case"], f"{mean_score}/{max}", f"{n_iterations}", + yaml_data["hash"], get_result_file_path(task), ) @@ -203,8 +209,9 @@ def test_query_generation( ): yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" - skip_if_already_run(model_name=model_name, task=task, subtask=subtask) + skip_if_already_run( + model_name=model_name, task=task, md5_hash=yaml_data["hash"] + ) prompt_engine = get_prompt_engine( yaml_data["input"]["kg_path"], prompt_engine ) @@ -237,9 +244,10 @@ def run_test(): write_results_to_file( prompt_engine.model_name, - subtask, + yaml_data["case"], f"{mean_score}/{max}", f"{n_iterations}", + yaml_data["hash"], get_result_file_path(task), ) @@ -253,8 +261,9 @@ def test_end_to_end_query_generation( ): yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" - skip_if_already_run(model_name=model_name, task=task, subtask=subtask) + skip_if_already_run( + model_name=model_name, task=task, md5_hash=yaml_data["hash"] + ) prompt_engine = get_prompt_engine( yaml_data["input"]["kg_path"], prompt_engine ) @@ -288,9 +297,10 @@ def run_test(): write_results_to_file( prompt_engine.model_name, - subtask, + yaml_data["case"], f"{mean_score}/{max}", f"{n_iterations}", + yaml_data["hash"], get_result_file_path(task), ) @@ -387,8 +397,9 @@ def test_property_exists( ): yaml_data = test_data_biocypher_query_generation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(yaml_data['hash'])}_{yaml_data['test_case_purpose']}" - skip_if_already_run(model_name=model_name, task=task, subtask=subtask) + skip_if_already_run( + model_name=model_name, task=task, md5_hash=yaml_data["hash"] + ) prompt_engine = get_prompt_engine( yaml_data["input"]["kg_path"], prompt_engine ) @@ -440,8 +451,9 @@ def run_test(): write_results_to_file( prompt_engine.model_name, - subtask, + yaml_data["case"], f"{mean_score}/{max}", f"{n_iterations}", + yaml_data["hash"], get_result_file_path(task), ) diff --git a/benchmark/test_rag_interpretation.py b/benchmark/test_rag_interpretation.py index 136e7446..fcb076d5 100644 --- a/benchmark/test_rag_interpretation.py +++ b/benchmark/test_rag_interpretation.py @@ -17,23 +17,23 @@ def test_explicit_relevance_of_single_fragments( conversation, multiple_testing, ): + yaml_data = test_data_rag_interpretation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_data_rag_interpretation['hash'])}_{test_data_rag_interpretation['test_case_purpose']}" - if "explicit" not in test_data_rag_interpretation["test_case_purpose"]: + skip_if_already_run( + model_name=model_name, task=task, md5_hash=yaml_data["hash"] + ) + if "explicit" not in yaml_data["case"]: pytest.skip( - f"test case {test_data_rag_interpretation['test_case_purpose']} not supported for {subtask} benchmark" + f"test case {yaml_data['case']} not supported for {task} benchmark" ) - skip_if_already_run(model_name=model_name, task=task, subtask=subtask) def run_test(): conversation.reset() # needs to be reset for each test [ conversation.append_system_message(m) - for m in test_data_rag_interpretation["input"]["system_messages"] + for m in yaml_data["input"]["system_messages"] ] - response, _, _ = conversation.query( - test_data_rag_interpretation["input"]["prompt"] - ) + response, _, _ = conversation.query(yaml_data["input"]["prompt"]) # lower case, remove punctuation response = ( @@ -42,9 +42,7 @@ def run_test(): score = [] - score.append( - response == test_data_rag_interpretation["expected"]["answer"] - ) + score.append(response == yaml_data["expected"]["answer"]) return calculate_test_score(score) @@ -52,9 +50,10 @@ def run_test(): write_results_to_file( model_name, - subtask, + yaml_data["case"], f"{mean_score}/{max}", f"{n_iterations}", + yaml_data["hash"], get_result_file_path(task), ) @@ -66,27 +65,27 @@ def test_implicit_relevance_of_multiple_fragments( evaluation_conversation, multiple_testing, ): + yaml_data = test_data_rag_interpretation task = f"{inspect.currentframe().f_code.co_name.replace('test_', '')}" - subtask = f"{str(test_data_rag_interpretation['hash'])}_{test_data_rag_interpretation['test_case_purpose']}" - if "implicit" not in test_data_rag_interpretation["test_case_purpose"]: + skip_if_already_run( + model_name=model_name, task=task, md5_hash=yaml_data["hash"] + ) + if "implicit" not in yaml_data["case"]: pytest.skip( - f"test case {test_data_rag_interpretation['test_case_purpose']} not supported for {subtask} benchmark" + f"test case {yaml_data['case']} not supported for {task} benchmark" ) - skip_if_already_run(model_name=model_name, task=task, subtask=subtask) def run_test(): conversation.reset() # needs to be reset for each test [ conversation.append_system_message(m) - for m in test_data_rag_interpretation["input"]["system_messages"] + for m in yaml_data["input"]["system_messages"] ] - response, _, _ = conversation.query( - test_data_rag_interpretation["input"]["prompt"] - ) + response, _, _ = conversation.query(yaml_data["input"]["prompt"]) msg = ( "You will receive a statement as an answer to this question: " - f"{test_data_rag_interpretation['input']['prompt']} " + f"{yaml_data['input']['prompt']} " "If the statement is an answer to the question, please type 'answer'. " "If the statement declines to answer to the question or apologises, giving the reason of lack of relevance of the given text fragments, please type 'decline'. " "Do not type anything except these two options. Here is the statement: " @@ -103,9 +102,7 @@ def run_test(): ).strip() score = ( - [True] - if eval == test_data_rag_interpretation["expected"]["behaviour"] - else [False] + [True] if eval == yaml_data["expected"]["behaviour"] else [False] ) return calculate_test_score(score) @@ -114,8 +111,9 @@ def run_test(): write_results_to_file( model_name, - subtask, + yaml_data["case"], f"{mean_score}/{max}", f"{n_iterations}", + yaml_data["hash"], get_result_file_path(task), ) From 2318dfa1d762e8635e86d5a60afe7c04aea2db6f Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sat, 3 Feb 2024 19:39:59 +0100 Subject: [PATCH 21/32] run benchmark --- benchmark/data/benchmark_data.csv | 8 - .../results/end_to_end_query_generation.csv | 109 ++++--- benchmark/results/entity_selection.csv | 110 ++++--- ...explicit_relevance_of_single_fragments.csv | 270 ++++++++++-------- ...plicit_relevance_of_multiple_fragments.csv | 83 ++++-- .../end_to_end_query_generation.csv | 7 +- .../entity_selection.csv | 53 ++-- ...explicit_relevance_of_single_fragments.csv | 51 ++-- ...plicit_relevance_of_multiple_fragments.csv | 51 ++-- .../overview-aggregated.csv | 43 ++- .../preprocessed_for_frontend/overview.csv | 43 ++- .../property_exists.csv | 21 +- .../property_selection.csv | 7 +- .../query_generation.csv | 27 +- .../relationship_selection.csv | 15 +- benchmark/results/property_exists.csv | 109 ++++--- benchmark/results/property_selection.csv | 109 ++++--- benchmark/results/query_generation.csv | 109 ++++--- benchmark/results/relationship_selection.csv | 110 ++++--- 19 files changed, 682 insertions(+), 653 deletions(-) delete mode 100644 benchmark/data/benchmark_data.csv diff --git a/benchmark/data/benchmark_data.csv b/benchmark/data/benchmark_data.csv deleted file mode 100644 index ad1f7b5d..00000000 --- a/benchmark/data/benchmark_data.csv +++ /dev/null @@ -1,8 +0,0 @@ -test_type;kg_path;prompt;entities;relationships;relationship_labels;properties;parts_of_query;test_case_purpose;system_messages -biocypher_query_generation;test_schema_info.yaml;Which genes are associated with mucoviscidosis?;["Gene", "Disease"];["GeneToPhenotypeAssociation"];{'PERTURBED': {'source': 'Disease', 'target': ['Protein', 'Gene']}};{'Disease': {'name': {}, 'ICD10': {}, 'DSM5': {}}, 'Gene': {'name': True}, 'GeneToPhenotypeAssociation': {'score': True, 'source': True, 'evidence': True}};"[""MATCH"", ""RETURN"", ""Gene"", ""Disease"", ""mucoviscidosis"", ""MATCH \([a-zA-Z]*:Gene\)<-\[[a-zA-Z]*:PERTURBED\]-\([a-zA-Z]*:Disease.*\)|MATCH \([a-zA-Z]*:Disease.*\)-\[[a-zA-Z]*:PERTURBED\]->\([a-zA-Z]*:Gene\)"", (""WHERE"", ""{name:"")]";single_word; -biocypher_query_generation;test_schema_info.yaml;Which genes are expressed in fibroblasts?;"[""Gene"", ""CellType""]";"[""GeneExpressedInCellType""]";{'GENE_EXPRESSED_IN_CELL_TYPE': {'source': 'Gene', 'target': ['CellType']}};{'CellType': {'cell_type_name': {}},'Gene': ['id', 'name'], 'GeneExpressedInCellType': ['expression_level']};"[""MATCH"", ""RETURN"", ""Gene"", ""CellType"", ""fibroblast"", ""MATCH \([a-zA-Z]*:Gene\)-\[[a-zA-Z]*:GENE_EXPRESSED_IN_CELL_TYPE\]->\([a-zA-Z]*:CellType.*|MATCH \([a-zA-Z]*:CellType.*<-\[[a-zA-Z]*:GENE_EXPRESSED_IN_CELL_TYPE\]-\([a-zA-Z]*:Gene\)"", (""WHERE"", ""{name:"")]";multi_word; -rag_interpretation;;Which molecular pathways are associated with cancer?;["no"];;;;;explicit;["You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'. Here is the fragment: ", "The earth is a globe."] -rag_interpretation;;Which molecular pathways are associated with cancer?;["yes"];;;;;explicit;["You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'. Here is the fragment: ", "TP53 is important in the regulation of cellular death.", "The EGFR pathway is deregulated in a number of cancers."] -rag_interpretation;;Which molecular pathways are associated with cancer?;["yes"];;;;;explicit;["You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'. Here is the fragment: ", "The EGFR pathway is deregulated in a number of cancers."] -rag_interpretation;;Which molecular pathways are associated with cancer?;["no"];;;;;explicit;["You will receive a text fragment to help answer the user's question. Your task is to judge these text fragments for relevance to the user's question, and return either 'yes' or 'no'. Here is the fragment: ", "The Human is the most endurant mammal."] -rag_interpretation;;Which molecular pathways are associated with cancer?;;;;;;implicit;["You will receive a number of text fragments to help answer the user's question. Your task is to use the information in these text fragments for answering the user's question, if they are relevant. Here are the fragments: ", "The earth is a globe.", "The Human is the most endurant mammal.", "America was discovered by Columbus.", "The wavelength of red light is longer than that of blue light."] \ No newline at end of file diff --git a/benchmark/results/end_to_end_query_generation.csv b/benchmark/results/end_to_end_query_generation.csv index d5c9fc36..d9ec6dff 100644 --- a/benchmark/results/end_to_end_query_generation.csv +++ b/benchmark/results/end_to_end_query_generation.csv @@ -1,56 +1,53 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,6.0/8,2 -gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,5.0/8,2 -gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,5.5/8,2 -gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/8,2 +model_name,subtask,score,iterations,md5_hash +gpt-3.5-turbo,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +gpt-3.5-turbo,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q3_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_1,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_1,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_S,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_S,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q6_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q6_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q3_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_1,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_1,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_S,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_S,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q6_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q6_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 7e81a7dc..86385748 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -1,57 +1,53 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 -gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 -gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 -gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 -llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q2_K,fe1d6c90419df7a4879f1db6bf2a6699_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.0/2,2 -llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/2,2 +model_name,subtask,score,iterations,md5_hash +gpt-3.5-turbo,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +gpt-3.5-turbo,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_0,single_word,1.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_1,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_S,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q6_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q8_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q2_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q2_K,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q3_K_M,single_word,1.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_1,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_1,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_M,single_word,1.5/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_S,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_S,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_K_M,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q6_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q8_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index 6f4aa3ae..49b5ad8e 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -1,113 +1,157 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,2181d4e6c5cb2e08c440ea1fb1e656b1_explicit_relevance_yes,1.0/1,2 -gpt-3.5-turbo,3e47253bf5b263d4776c5ff16fc02f12_explicit_evaluation_yes,1.0/1,2 -gpt-3.5-turbo,4d2b9088df3dac7705e3e0ea4f774a55_explicit_evaluation_no,1.0/1,2 -gpt-3.5-turbo,8eaeeb846a23455661163a6aff503bd9_explicit_relevance_no,1.0/1,2 -gpt-3.5-turbo,c62b234f4d4e8841b405d3389a32cbcf_explicit_relevance_yes,1.0/1,2 -gpt-3.5-turbo,eeb3f60d196c661902d99c37b403def0_explicit_relevance_no,1.0/1,2 -gpt-4,2181d4e6c5cb2e08c440ea1fb1e656b1_explicit_relevance_yes,1.0/1,2 -gpt-4,3e47253bf5b263d4776c5ff16fc02f12_explicit_evaluation_yes,1.0/1,2 -gpt-4,4d2b9088df3dac7705e3e0ea4f774a55_explicit_evaluation_no,1.0/1,2 -gpt-4,8eaeeb846a23455661163a6aff503bd9_explicit_relevance_no,1.0/1,2 -gpt-4,c62b234f4d4e8841b405d3389a32cbcf_explicit_relevance_yes,1.0/1,2 -gpt-4,eeb3f60d196c661902d99c37b403def0_explicit_relevance_no,1.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q3_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q3_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q3_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q3_K_M,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_1,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_1,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_1,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_1,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_S,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_S,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_S,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_S,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q6_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q6_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q6_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q6_K,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q3_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q3_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q3_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q3_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_1,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_1,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_1,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_1,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_K_S,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_K_S,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_K_S,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q4_K_S,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_K_M,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_K_M,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_K_M,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q6_K,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q6_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q6_K,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q6_K,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q8_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q8_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q8_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -llama-2-chat:7:ggmlv3:q8_0,dc574c3e08323cead7fd99076482b001_explicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,3208afe06efed369103692065713f060_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,549b1353372632a891705bb0e621e091_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,3208afe06efed369103692065713f060_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,549b1353372632a891705bb0e621e091_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,549b1353372632a891705bb0e621e091_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,3208afe06efed369103692065713f060_explicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,391baab91d5acb3d7ed91e73c2b4144a_explicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,549b1353372632a891705bb0e621e091_explicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,dc574c3e08323cead7fd99076482b001_explicit,0.0/1,2 +model_name,subtask,score,iterations,md5_hash +gpt-3.5-turbo,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +gpt-3.5-turbo,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +gpt-3.5-turbo,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +gpt-3.5-turbo,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +gpt-3.5-turbo,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +gpt-3.5-turbo,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q2_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q3_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q3_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q3_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q3_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q3_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q3_K_M,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q4_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q4_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q4_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q4_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q4_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q4_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q4_1,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q4_1,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q4_1,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q4_1,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q4_1,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q4_1,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q4_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q4_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q4_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q4_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q4_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q4_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q4_K_S,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q4_K_S,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q4_K_S,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q4_K_S,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q4_K_S,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q4_K_S,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q5_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q5_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q5_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q5_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q5_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q5_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q5_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q5_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q5_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q5_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q5_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q5_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q6_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q6_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q6_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q6_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q6_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q6_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggmlv3:q8_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggmlv3:q8_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggmlv3:q8_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggmlv3:q8_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggmlv3:q8_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggmlv3:q8_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q2_K,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q2_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q2_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q2_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q3_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q3_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q3_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q3_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q3_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q3_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q4_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q4_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q4_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q4_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q4_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q4_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q4_1,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q4_1,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q4_1,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q4_1,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q4_1,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q4_1,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q4_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q4_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q4_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q4_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q4_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q4_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q4_K_S,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q4_K_S,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q4_K_S,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q4_K_S,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q4_K_S,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q4_K_S,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q5_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q5_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q5_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q5_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q5_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q5_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q5_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q5_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q5_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q5_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q5_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q5_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q6_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q6_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q6_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q6_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q6_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q6_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggmlv3:q8_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggmlv3:q8_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggmlv3:q8_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggmlv3:q8_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggmlv3:q8_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggmlv3:q8_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_no_more_explicit,0.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_no_repeat_instruction,0.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_no_simple,0.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_evaluation_yes,0.0/1,2,1773602eac8037fbea015069d5f15cd2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_relevance_no_simple,0.5/1,2,bf26b8241de3470cd9a406aea0992fb2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,explicit_evaluation_yes,0.0/1,2,1773602eac8037fbea015069d5f15cd2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,explicit_relevance_no_more_explicit,0.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,explicit_relevance_no_repeat_instruction,0.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,explicit_relevance_no_simple,0.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_evaluation_yes,0.0/1,2,1773602eac8037fbea015069d5f15cd2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_relevance_no_more_explicit,0.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_relevance_no_repeat_instruction,0.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_relevance_no_simple,0.5/1,2,bf26b8241de3470cd9a406aea0992fb2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_evaluation_no,0.5/1,2,d15e0094569f8df146459b50a781fc55 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_evaluation_yes,0.0/1,2,1773602eac8037fbea015069d5f15cd2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_relevance_no_more_explicit,0.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_relevance_no_repeat_instruction,0.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_relevance_no_simple,0.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv index 04368c1b..1d800940 100644 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -1,30 +1,53 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,828dd99e7080411673c30e5b16d777da_implicit_relevance_no,1.0/1,2 -gpt-3.5-turbo,a4aecd18bfc529901926ce7da7cf0048_implicit_relevance_yes,1.0/1,2 -gpt-4,828dd99e7080411673c30e5b16d777da_implicit_relevance_no,1.0/1,2 -gpt-4,a4aecd18bfc529901926ce7da7cf0048_implicit_relevance_yes,1.0/1,2 -llama-2-chat:13:ggmlv3:q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 -llama-2-chat:13:ggmlv3:q3_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:13:ggmlv3:q4_1,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:13:ggmlv3:q4_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_S,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:13:ggmlv3:q5_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:13:ggmlv3:q5_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:13:ggmlv3:q6_K,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:13:ggmlv3:q8_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q3_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q4_1,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q4_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q4_K_S,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q5_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q6_K,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -llama-2-chat:7:ggmlv3:q8_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,8d30cde616beb73fc77b02b919c846c9_implicit,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,8d30cde616beb73fc77b02b919c846c9_implicit,0.5/1,2 +model_name,subtask,score,iterations,md5_hash +gpt-3.5-turbo,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +gpt-3.5-turbo,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q2_K,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q3_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q4_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q4_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q4_1,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q4_1,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q4_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q4_K_S,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q4_K_S,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q5_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q5_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q5_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q5_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q6_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q6_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggmlv3:q8_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggmlv3:q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q2_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q3_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q4_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q4_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q4_1,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q4_1,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q4_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q4_K_S,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q4_K_S,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q5_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q5_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q5_K_M,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q5_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q6_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q6_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggmlv3:q8_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggmlv3:q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,implicit_relevance_yes,0.5/1,2,f9d749647929fcb55321c614a3bf8d20 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 diff --git a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv index 5886ed55..c2ab0bb7 100644 --- a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv @@ -1,7 +1,6 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-4,11.5,16.0,0.71875,2 -gpt-3.5-turbo,17.0,24.0,0.7083333333333334,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,16.0,0.0,2 +gpt-3.5-turbo,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,16.0,0.0,2 @@ -13,6 +12,7 @@ llama-2-chat:7:ggmlv3:q5_0,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_S,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_M,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q4_1,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_0,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q3_K_M,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q2_K,0.0,16.0,0.0,2 llama-2-chat:13:ggmlv3:q8_0,0.0,16.0,0.0,2 @@ -24,5 +24,4 @@ llama-2-chat:13:ggmlv3:q4_K_M,0.0,16.0,0.0,2 llama-2-chat:13:ggmlv3:q4_1,0.0,16.0,0.0,2 llama-2-chat:13:ggmlv3:q4_0,0.0,16.0,0.0,2 llama-2-chat:13:ggmlv3:q3_K_M,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,16.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/entity_selection.csv b/benchmark/results/preprocessed_for_frontend/entity_selection.csv index 669d54f0..52482637 100644 --- a/benchmark/results/preprocessed_for_frontend/entity_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/entity_selection.csv @@ -1,28 +1,27 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,6.0,6.0,1.0,2 -gpt-4,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_K_M,3.0,4.0,0.75,2 -llama-2-chat:7:ggmlv3:q3_K_M,3.0,4.0,0.75,2 -llama-2-chat:7:ggmlv3:q4_1,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_0,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q8_0,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q6_K,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q5_K_M,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q5_0,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_K_S,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q2_K,2.0,4.0,0.5,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,4.0,0.25,2 -llama-2-chat:13:ggmlv3:q4_0,1.0,4.0,0.25,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1.0,4.0,0.25,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_1,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q6_K,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_S,0.0,4.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,4.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,4.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,4.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,4.0,0.0,2 +gpt-3.5-turbo,4.0,4.0,1.0,2 +llama-2-chat:7:ggmlv3:q2_K,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q8_0,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q5_K_M,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q5_0,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q4_K_S,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q4_1,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q3_K_M,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q4_0,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q4_K_M,2.5,5.0,0.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,5.0,0.4,2 +llama-2-chat:13:ggmlv3:q4_0,1.0,5.0,0.2,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,5.0,0.2,2 +llama-2-chat:7:ggmlv3:q6_K,1.0,5.0,0.2,2 +llama-2-chat:13:ggmlv3:q6_K,0.0,5.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,5.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_1,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q8_0,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_K_S,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_0,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,0.0,5.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,5.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv index c4b93710..614a0acd 100644 --- a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv @@ -1,28 +1,27 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q2_K,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q8_0,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q6_K,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q5_K_M,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q5_0,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_K_S,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_K_M,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_1,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_0,4.0,4.0,1.0,2 -gpt-4,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q3_K_M,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q8_0,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q6_K,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q5_0,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_K_S,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_K_M,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_1,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_0,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q2_K,3.0,4.0,0.75,2 -llama-2-chat:13:ggmlv3:q3_K_M,2.0,4.0,0.5,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.0,4.0,0.5,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.0,4.0,0.5,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,4.0,0.25,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,4.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,4.0,0.0,2 +llama-2-chat:13:ggmlv3:q8_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q8_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q6_K,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q5_K_M,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q5_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_K_S,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_K_M,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_1,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q3_K_M,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_0,6.0,6.0,1.0,2 +llama-2-chat:13:ggmlv3:q6_K,6.0,6.0,1.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,6.0,6.0,1.0,2 +llama-2-chat:13:ggmlv3:q5_0,6.0,6.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_K_S,6.0,6.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_K_M,6.0,6.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_1,6.0,6.0,1.0,2 +llama-2-chat:13:ggmlv3:q4_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q2_K,5.0,6.0,0.8333333333333334,2 +llama-2-chat:13:ggmlv3:q2_K,5.0,6.0,0.8333333333333334,2 +llama-2-chat:13:ggmlv3:q3_K_M,5.0,6.0,0.8333333333333334,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,6.0,0.4166666666666667,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2.0,6.0,0.3333333333333333,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,6.0,0.16666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.5,6.0,0.08333333333333333,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.5,6.0,0.08333333333333333,2 diff --git a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv index 6c25aa98..63101439 100644 --- a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv @@ -1,28 +1,27 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,2.0,2.0,1.0,2 -gpt-4,2.0,2.0,1.0,2 -llama-2-chat:13:ggmlv3:q2_K,1.0,1.0,1.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,1.0,1.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1.0,1.0,1.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.0,1.0,1.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.5,1.0,0.5,2 -llama-2-chat:7:ggmlv3:q6_K,0.5,1.0,0.5,2 -llama-2-chat:7:ggmlv3:q5_K_M,0.5,1.0,0.5,2 -llama-2-chat:7:ggmlv3:q5_0,0.5,1.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_K_M,0.5,1.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_1,0.5,1.0,0.5,2 -llama-2-chat:7:ggmlv3:q3_K_M,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q8_0,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q6_K,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q5_K_M,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q5_0,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q4_K_S,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q4_1,0.5,1.0,0.5,2 -llama-2-chat:13:ggmlv3:q4_0,0.5,1.0,0.5,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.5,1.0,0.5,2 -llama-2-chat:7:ggmlv3:q2_K,0.0,1.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,1.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_S,0.0,1.0,0.0,2 -llama-2-chat:7:ggmlv3:q8_0,0.0,1.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,1.0,0.0,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,1.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,2.0,2.0,1.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,2.0,1.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.0,2.0,1.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.5,2.0,0.75,2 +llama-2-chat:7:ggmlv3:q5_K_M,1.5,2.0,0.75,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1.5,2.0,0.75,2 +llama-2-chat:13:ggmlv3:q4_K_S,1.5,2.0,0.75,2 +llama-2-chat:13:ggmlv3:q5_0,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q5_0,1.0,2.0,0.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,2.0,0.5,2 +llama-2-chat:13:ggmlv3:q3_K_M,1.0,2.0,0.5,2 +llama-2-chat:13:ggmlv3:q4_0,1.0,2.0,0.5,2 +llama-2-chat:13:ggmlv3:q4_1,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q8_0,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q6_K,1.0,2.0,0.5,2 +llama-2-chat:13:ggmlv3:q4_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_K_S,1.0,2.0,0.5,2 +llama-2-chat:13:ggmlv3:q5_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_1,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q3_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q2_K,1.0,2.0,0.5,2 +llama-2-chat:13:ggmlv3:q8_0,1.0,2.0,0.5,2 +llama-2-chat:13:ggmlv3:q6_K,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_0,1.0,2.0,0.5,2 diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv index 65db68b4..778a800d 100644 --- a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv +++ b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv @@ -1,28 +1,27 @@ Model name,Mean -gpt-4,0.9002757352941176 -gpt-3.5-turbo,0.8854166666666667 -llama-2-chat:7:ggmlv3:q2_K,0.5372242647058824 -llama-2-chat:7:ggmlv3:q3_K_M,0.53515625 -llama-2-chat:7:ggmlv3:q5_0,0.4739583333333333 +llama-2-chat:7:ggmlv3:q2_K,inf +gpt-3.5-turbo,0.8777573529411764 +llama-2-chat:7:ggmlv3:q3_K_M,0.51640625 +llama-2-chat:7:ggmlv3:q5_0,0.4864583333333333 +llama-2-chat:7:ggmlv3:q5_K_M,0.465625 llama-2-chat:13:ggmlv3:q5_0,0.4609375 -llama-2-chat:7:ggmlv3:q4_0,0.44140625 -llama-2-chat:7:ggmlv3:q4_K_M,0.43046875 +llama-2-chat:7:ggmlv3:q4_0,0.45 +llama-2-chat:7:ggmlv3:q4_K_S,0.4447916666666667 llama-2-chat:13:ggmlv3:q4_1,0.4296875 -llama-2-chat:7:ggmlv3:q5_K_M,0.42421875 -llama-2-chat:13:ggmlv3:q4_0,0.4140625 -llama-2-chat:7:ggmlv3:q6_K,0.403125 -llama-2-chat:7:ggmlv3:q4_1,0.403125 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.3984375 +llama-2-chat:7:ggmlv3:q4_1,0.415625 +llama-2-chat:13:ggmlv3:q4_0,0.4078125 +llama-2-chat:7:ggmlv3:q4_K_M,0.4075520833333333 +llama-2-chat:7:ggmlv3:q8_0,0.403125 llama-2-chat:13:ggmlv3:q5_K_M,0.3984375 +llama-2-chat:13:ggmlv3:q4_K_S,0.3802083333333333 llama-2-chat:13:ggmlv3:q6_K,0.3776041666666667 +llama-2-chat:13:ggmlv3:q4_K_M,0.3776041666666667 llama-2-chat:13:ggmlv3:q8_0,0.3776041666666667 -llama-2-chat:7:ggmlv3:q4_K_S,0.3697916666666667 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.359375 -llama-2-chat:13:ggmlv3:q4_K_S,0.3528645833333333 -llama-2-chat:7:ggmlv3:q8_0,0.34765625 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.3333333333333333 -llama-2-chat:13:ggmlv3:q2_K,0.33035714285714285 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.328125 -llama-2-chat:13:ggmlv3:q4_K_M,0.3151041666666667 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.3020833333333333 -llama-2-chat:13:ggmlv3:q3_K_M,0.23958333333333331 +llama-2-chat:7:ggmlv3:q6_K,0.35703125 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.35026041666666663 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.34895833333333337 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.34791666666666665 +llama-2-chat:13:ggmlv3:q2_K,0.34226190476190477 +llama-2-chat:13:ggmlv3:q3_K_M,0.3359375 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.33125 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.28125 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index e33cccf2..0e0a7d3d 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,28 +1,27 @@ Model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean -gpt-4,0.7647058823529411,0.71875,1.0,1.0,1.0,0.71875,1.0,1.0,0.9002757352941176 -gpt-3.5-turbo,0.6666666666666666,0.7083333333333334,1.0,1.0,1.0,0.7083333333333334,1.0,1.0,0.8854166666666667 -llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.5625,1.0,0.5,0.0,0.0,0.0,2.0,0.5372242647058824 -llama-2-chat:7:ggmlv3:q3_K_M,0.0,0.65625,1.0,0.75,0.5,0.0,0.5,0.875,0.53515625 -llama-2-chat:7:ggmlv3:q5_0,0.0,0.625,1.0,0.5,0.5,0.0,0.5,0.6666666666666666,0.4739583333333333 +llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.4375,0.8333333333333334,0.6,0.0,0.0,0.5,inf,inf +gpt-3.5-turbo,0.6470588235294118,0.6875,1.0,1.0,1.0,0.6875,1.0,1.0,0.8777573529411764 +llama-2-chat:7:ggmlv3:q3_K_M,0.0,0.65625,1.0,0.6,0.5,0.0,0.5,0.875,0.51640625 +llama-2-chat:7:ggmlv3:q5_0,0.0,0.625,1.0,0.6,0.5,0.0,0.5,0.6666666666666666,0.4864583333333333 +llama-2-chat:7:ggmlv3:q5_K_M,0.0,0.625,1.0,0.6,0.0,0.0,0.75,0.75,0.465625 llama-2-chat:13:ggmlv3:q5_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.5,0.4609375 -llama-2-chat:7:ggmlv3:q4_0,0.0,0.53125,1.0,0.5,0.0,0.0,0.0,1.5,0.44140625 -llama-2-chat:7:ggmlv3:q4_K_M,0.0,0.59375,1.0,0.75,0.0,0.0,0.5,0.6,0.43046875 +llama-2-chat:7:ggmlv3:q4_0,0.0,0.5,1.0,0.6,0.0,0.0,0.5,1.0,0.45 +llama-2-chat:7:ggmlv3:q4_K_S,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.8333333333333334,0.4447916666666667 llama-2-chat:13:ggmlv3:q4_1,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.25,0.4296875 -llama-2-chat:7:ggmlv3:q5_K_M,0.0,0.59375,1.0,0.5,0.0,0.0,0.5,0.8,0.42421875 -llama-2-chat:13:ggmlv3:q4_0,0.0,0.5625,1.0,0.25,0.0,0.0,0.5,1.0,0.4140625 -llama-2-chat:7:ggmlv3:q6_K,0.0,0.625,1.0,0.5,0.0,0.0,0.5,0.6,0.403125 -llama-2-chat:7:ggmlv3:q4_1,0.0,0.625,1.0,0.5,0.0,0.0,0.5,0.6,0.403125 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.6875,0.0,0.0,0.0,0.0,1.0,1.5,0.3984375 +llama-2-chat:7:ggmlv3:q4_1,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.6,0.415625 +llama-2-chat:13:ggmlv3:q4_0,0.0,0.5625,1.0,0.2,0.0,0.0,0.5,1.0,0.4078125 +llama-2-chat:7:ggmlv3:q4_K_M,0.0,0.59375,1.0,0.5,0.0,0.0,0.5,0.6666666666666666,0.4075520833333333 +llama-2-chat:7:ggmlv3:q8_0,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.5,0.403125 llama-2-chat:13:ggmlv3:q5_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.0,0.3984375 +llama-2-chat:13:ggmlv3:q4_K_S,0.0,0.625,1.0,0.0,0.0,0.0,0.75,0.6666666666666666,0.3802083333333333 llama-2-chat:13:ggmlv3:q6_K,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667 llama-2-chat:13:ggmlv3:q8_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667 -llama-2-chat:7:ggmlv3:q4_K_S,0.0,0.625,1.0,0.5,0.0,0.0,0.0,0.8333333333333334,0.3697916666666667 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.75,0.5,0.0,0.0,0.0,1.0,0.625,0.359375 -llama-2-chat:13:ggmlv3:q4_K_S,0.0,0.65625,1.0,0.0,0.0,0.0,0.5,0.6666666666666666,0.3528645833333333 -llama-2-chat:7:ggmlv3:q8_0,0.0,0.65625,1.0,0.5,0.0,0.0,0.0,0.625,0.34765625 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,0.75,0.25,0.25,0.25,0.0,0.5,0.6666666666666666,0.3333333333333333 -llama-2-chat:13:ggmlv3:q2_K,0.0,0.5625,0.75,0.0,0.0,0.0,1.0,,0.33035714285714285 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,0.75,0.5,0.25,0.0,0.0,0.5,0.625,0.328125 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.0,0.8333333333333334,0.3151041666666667 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,0.75,0.0,0.0,0.0,0.0,1.0,0.6666666666666666,0.3020833333333333 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,0.75,0.5,0.0,0.0,0.0,0.0,0.6666666666666666,0.23958333333333331 +llama-2-chat:7:ggmlv3:q6_K,0.0,0.65625,1.0,0.2,0.0,0.0,0.5,0.5,0.35703125 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.71875,0.3333333333333333,0.0,0.0,0.0,0.75,1.0,0.35026041666666663 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.75,0.4166666666666667,0.0,0.0,0.0,1.0,0.625,0.34895833333333337 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,0.75,0.1666666666666666,0.2,0.0,0.0,1.0,0.6666666666666666,0.34791666666666665 +llama-2-chat:13:ggmlv3:q2_K,0.0,0.5625,0.8333333333333334,0.0,0.0,0.0,1.0,,0.34226190476190477 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,0.6875,0.8333333333333334,0.0,0.0,0.0,0.5,0.6666666666666666,0.3359375 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,0.75,0.0833333333333333,0.4,0.25,0.0,0.5,0.6666666666666666,0.33125 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,0.75,0.0833333333333333,0.0,0.0,0.0,0.75,0.6666666666666666,0.28125 diff --git a/benchmark/results/preprocessed_for_frontend/property_exists.csv b/benchmark/results/preprocessed_for_frontend/property_exists.csv index 3051bfc2..c95820a8 100644 --- a/benchmark/results/preprocessed_for_frontend/property_exists.csv +++ b/benchmark/results/preprocessed_for_frontend/property_exists.csv @@ -1,28 +1,27 @@ Model name,Passed test cases,Total test cases,Score,Iterations -llama-2-chat:7:ggmlv3:q2_K,2.0,1.0,2.0,2 -llama-2-chat:7:ggmlv3:q4_0,3.0,2.0,1.5,2 +llama-2-chat:7:ggmlv3:q2_K,1.0,0.0,inf,2 llama-2-chat:13:ggmlv3:q5_0,1.5,1.0,1.5,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.5,1.0,1.5,2 llama-2-chat:13:ggmlv3:q4_1,2.5,2.0,1.25,2 -gpt-4,4.0,4.0,1.0,2 +gpt-3.5-turbo,4.0,4.0,1.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2.0,2.0,1.0,2 llama-2-chat:13:ggmlv3:q5_K_M,3.0,3.0,1.0,2 -gpt-3.5-turbo,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q4_0,4.0,4.0,1.0,2 llama-2-chat:13:ggmlv3:q4_0,1.0,1.0,1.0,2 llama-2-chat:7:ggmlv3:q3_K_M,3.5,4.0,0.875,2 llama-2-chat:13:ggmlv3:q8_0,2.5,3.0,0.8333333333333334,2 llama-2-chat:13:ggmlv3:q4_K_M,2.5,3.0,0.8333333333333334,2 llama-2-chat:7:ggmlv3:q4_K_S,2.5,3.0,0.8333333333333334,2 llama-2-chat:13:ggmlv3:q6_K,2.5,3.0,0.8333333333333334,2 -llama-2-chat:7:ggmlv3:q5_K_M,4.0,5.0,0.8,2 +llama-2-chat:7:ggmlv3:q5_K_M,3.0,4.0,0.75,2 llama-2-chat:13:ggmlv3:q4_K_S,2.0,3.0,0.6666666666666666,2 -llama-2-chat:13:ggmlv3:q3_K_M,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 llama-2-chat:7:ggmlv3:q5_0,2.0,3.0,0.6666666666666666,2 -llama-2-chat:7:ggmlv3:q8_0,2.5,4.0,0.625,2 +llama-2-chat:7:ggmlv3:q4_K_M,2.0,3.0,0.6666666666666666,2 +llama-2-chat:13:ggmlv3:q3_K_M,2.0,3.0,0.6666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,4.0,0.625,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.5,4.0,0.625,2 -llama-2-chat:7:ggmlv3:q6_K,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q4_K_M,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q4_1,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q6_K,2.0,4.0,0.5,2 +llama-2-chat:7:ggmlv3:q8_0,2.5,5.0,0.5,2 llama-2-chat:13:ggmlv3:q2_K,0.0,0.0,,2 diff --git a/benchmark/results/preprocessed_for_frontend/property_selection.csv b/benchmark/results/preprocessed_for_frontend/property_selection.csv index 765c4336..6855dd0f 100644 --- a/benchmark/results/preprocessed_for_frontend/property_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/property_selection.csv @@ -1,6 +1,5 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-4,13.0,17.0,0.7647058823529411,2 -gpt-3.5-turbo,16.0,24.0,0.6666666666666666,2 +gpt-3.5-turbo,11.0,17.0,0.6470588235294118,2 llama-2-chat:7:ggmlv3:q2_K,4.0,17.0,0.23529411764705882,2 llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,6.0,0.0,2 @@ -13,8 +12,9 @@ llama-2-chat:7:ggmlv3:q5_K_M,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q5_0,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_S,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q3_K_M,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q3_K_M,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q6_K,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q5_K_M,0.0,6.0,0.0,2 @@ -24,5 +24,4 @@ llama-2-chat:13:ggmlv3:q4_K_M,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/query_generation.csv b/benchmark/results/preprocessed_for_frontend/query_generation.csv index 975132fe..7b2c8126 100644 --- a/benchmark/results/preprocessed_for_frontend/query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/query_generation.csv @@ -1,28 +1,27 @@ Model name,Passed test cases,Total test cases,Score,Iterations mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,12.0,16.0,0.75,2 -llama-2-chat:13:ggmlv3:q3_K_M,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,12.0,16.0,0.75,2 -gpt-4,11.5,16.0,0.71875,2 -gpt-3.5-turbo,17.0,24.0,0.7083333333333334,2 -llama-2-chat:13:ggmlv3:q5_0,11.0,16.0,0.6875,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,11.5,16.0,0.71875,2 llama-2-chat:13:ggmlv3:q5_K_M,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q6_K,11.0,16.0,0.6875,2 llama-2-chat:13:ggmlv3:q8_0,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q6_K,11.0,16.0,0.6875,2 +gpt-3.5-turbo,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q5_0,11.0,16.0,0.6875,2 +llama-2-chat:13:ggmlv3:q3_K_M,11.0,16.0,0.6875,2 llama-2-chat:13:ggmlv3:q4_1,11.0,16.0,0.6875,2 llama-2-chat:13:ggmlv3:q4_K_M,11.0,16.0,0.6875,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q4_K_S,10.5,16.0,0.65625,2 -llama-2-chat:7:ggmlv3:q8_0,10.5,16.0,0.65625,2 llama-2-chat:7:ggmlv3:q3_K_M,10.5,16.0,0.65625,2 +llama-2-chat:7:ggmlv3:q6_K,10.5,16.0,0.65625,2 +llama-2-chat:7:ggmlv3:q5_K_M,10.0,16.0,0.625,2 +llama-2-chat:7:ggmlv3:q8_0,10.0,16.0,0.625,2 llama-2-chat:7:ggmlv3:q5_0,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q6_K,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q4_1,10.0,16.0,0.625,2 +llama-2-chat:13:ggmlv3:q4_K_S,10.0,16.0,0.625,2 llama-2-chat:7:ggmlv3:q4_K_S,10.0,16.0,0.625,2 +llama-2-chat:7:ggmlv3:q4_1,10.0,16.0,0.625,2 llama-2-chat:7:ggmlv3:q4_K_M,9.5,16.0,0.59375,2 -llama-2-chat:7:ggmlv3:q5_K_M,9.5,16.0,0.59375,2 -llama-2-chat:7:ggmlv3:q2_K,9.0,16.0,0.5625,2 -llama-2-chat:13:ggmlv3:q4_0,9.0,16.0,0.5625,2 llama-2-chat:13:ggmlv3:q2_K,9.0,16.0,0.5625,2 -llama-2-chat:7:ggmlv3:q4_0,8.5,16.0,0.53125,2 +llama-2-chat:13:ggmlv3:q4_0,9.0,16.0,0.5625,2 +llama-2-chat:7:ggmlv3:q4_0,8.0,16.0,0.5,2 +llama-2-chat:7:ggmlv3:q2_K,7.0,16.0,0.4375,2 diff --git a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv index aa7c53e8..0c1dbaeb 100644 --- a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv @@ -1,23 +1,19 @@ Model name,Passed test cases,Total test cases,Score,Iterations -gpt-3.5-turbo,9.0,9.0,1.0,2 -gpt-4,6.0,6.0,1.0,2 +gpt-3.5-turbo,6.0,6.0,1.0,2 llama-2-chat:7:ggmlv3:q5_0,3.0,6.0,0.5,2 llama-2-chat:7:ggmlv3:q3_K_M,3.0,6.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.5,6.0,0.25,2 -llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q6_K,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q5_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,9.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_S,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q2_K,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q6_K,0.0,6.0,0.0,2 @@ -25,4 +21,7 @@ llama-2-chat:13:ggmlv3:q5_K_M,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_K_S,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/property_exists.csv b/benchmark/results/property_exists.csv index ea12ff73..96671ec9 100644 --- a/benchmark/results/property_exists.csv +++ b/benchmark/results/property_exists.csv @@ -1,56 +1,53 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 -gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 -gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,2.0/2,2 -gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,2.0/2,2 -llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/0,2 -llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/0,2 -llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/0,2 -llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/1,2 -llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 -llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.5/0,2 -llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 -llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 -llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/0,2 -llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 -llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/2,2 -llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/0,2 -llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 -llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/2,2 -llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,2.5/3,2 -llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,2.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.5/0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,1.0/1,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.0/2,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,1.5/2,2 +model_name,subtask,score,iterations,md5_hash +gpt-3.5-turbo,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +gpt-3.5-turbo,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/0,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/0,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q3_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q3_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/0,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_1,multi_word,1.5/1,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_1,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_M,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_S,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_S,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_0,multi_word,0.5/0,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q6_K,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q6_K,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q8_0,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q8_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q2_K,multi_word,1.0/0,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q2_K,single_word,0.0/0,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q3_K_M,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_0,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_0,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_1,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_1,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_S,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_S,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_K_M,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_K_M,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q6_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q6_K,single_word,1.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q8_0,single_word,1.5/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,1.0/1,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/property_selection.csv b/benchmark/results/property_selection.csv index 691fd9e4..03475ee5 100644 --- a/benchmark/results/property_selection.csv +++ b/benchmark/results/property_selection.csv @@ -1,56 +1,53 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,5.0/7,2 -gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/7,2 -gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/10,2 -gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,7.0/7,2 -gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/10,2 -llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,2.0/7,2 -llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,2.0/10,2 -llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +model_name,subtask,score,iterations,md5_hash +gpt-3.5-turbo,multi_word,5.0/7,2,f29b6faf7d003159d43a5d1cf451587f +gpt-3.5-turbo,single_word,6.0/10,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q2_K,multi_word,2.0/7,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q2_K,single_word,2.0/10,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/query_generation.csv b/benchmark/results/query_generation.csv index 064a87e6..c4c2a37b 100644 --- a/benchmark/results/query_generation.csv +++ b/benchmark/results/query_generation.csv @@ -1,56 +1,53 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,6.0/8,2 -gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,5.0/8,2 -gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,5.5/8,2 -gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 -llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 -llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 -llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 -llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 -llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,4.0/8,2 -llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,4.5/8,2 -llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.5/8,2 -llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,4.0/8,2 -llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,4.5/8,2 -llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.0/8,2 -llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,5.5/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,5.5/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,6.0/8,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,6.0/8,2 +model_name,subtask,score,iterations,md5_hash +gpt-3.5-turbo,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +gpt-3.5-turbo,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q2_K,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q2_K,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q3_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q3_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_0,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_1,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_1,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_S,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_S,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q6_K,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q6_K,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q8_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q8_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q2_K,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q2_K,single_word,3.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q3_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q3_K_M,single_word,5.5/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_0,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_0,single_word,4.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_1,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_1,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_M,multi_word,5.5/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_M,single_word,4.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_S,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_S,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q6_K,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q6_K,single_word,5.5/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q8_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q8_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,5.5/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/relationship_selection.csv b/benchmark/results/relationship_selection.csv index fe5d0069..9d75eed5 100644 --- a/benchmark/results/relationship_selection.csv +++ b/benchmark/results/relationship_selection.csv @@ -1,57 +1,53 @@ -model_name,subtask,score,iterations -gpt-3.5-turbo,152cdbfe563d26cbcc2838f185e87ae2_multi_word,3.0/3,2 -gpt-3.5-turbo,18695dffc4ad2b31935a6a768ebe8f74_multi_word,3.0/3,2 -gpt-3.5-turbo,fe1d6c90419df7a4879f1db6bf2a6699_single_word,3.0/3,2 -gpt-4,152cdbfe563d26cbcc2838f185e87ae2_multi_word,3.0/3,2 -gpt-4,fe1d6c90419df7a4879f1db6bf2a6699_single_word,3.0/3,2 -llama-2-chat:13:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q2_K,fe1d6c90419df7a4879f1db6bf2a6699_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:13:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q3_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q3_K_M,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 -llama-2-chat:7:ggmlv3:q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_1,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_1,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_S,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q4_K_S,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_0,72434e7a340a3f6dd047b944988491b7_single_word,3.0/3,2 -llama-2-chat:7:ggmlv3:q5_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q5_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q6_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q6_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -llama-2-chat:7:ggmlv3:q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,1.5/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,18695dffc4ad2b31935a6a768ebe8f74_multi_word,0.0/3,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,72434e7a340a3f6dd047b944988491b7_single_word,0.0/3,2 +model_name,subtask,score,iterations,md5_hash +gpt-3.5-turbo,multi_word,3.0/3,2,f29b6faf7d003159d43a5d1cf451587f +gpt-3.5-turbo,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggmlv3:q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q3_K_M,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_0,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggmlv3:q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,1.5/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec From 562031e9852cb41ab852937dd51f7f172435fb42 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sat, 3 Feb 2024 20:37:02 +0100 Subject: [PATCH 22/32] prevent inf values, split model name --- .../overview-aggregated.csv | 54 +++++++++---------- .../preprocessed_for_frontend/overview.csv | 54 +++++++++---------- .../property_exists.csv | 14 ++--- docs/scripts/hooks.py | 40 +++++++++++--- 4 files changed, 95 insertions(+), 67 deletions(-) diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv index 778a800d..8baec328 100644 --- a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv +++ b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv @@ -1,27 +1,27 @@ -Model name,Mean -llama-2-chat:7:ggmlv3:q2_K,inf -gpt-3.5-turbo,0.8777573529411764 -llama-2-chat:7:ggmlv3:q3_K_M,0.51640625 -llama-2-chat:7:ggmlv3:q5_0,0.4864583333333333 -llama-2-chat:7:ggmlv3:q5_K_M,0.465625 -llama-2-chat:13:ggmlv3:q5_0,0.4609375 -llama-2-chat:7:ggmlv3:q4_0,0.45 -llama-2-chat:7:ggmlv3:q4_K_S,0.4447916666666667 -llama-2-chat:13:ggmlv3:q4_1,0.4296875 -llama-2-chat:7:ggmlv3:q4_1,0.415625 -llama-2-chat:13:ggmlv3:q4_0,0.4078125 -llama-2-chat:7:ggmlv3:q4_K_M,0.4075520833333333 -llama-2-chat:7:ggmlv3:q8_0,0.403125 -llama-2-chat:13:ggmlv3:q5_K_M,0.3984375 -llama-2-chat:13:ggmlv3:q4_K_S,0.3802083333333333 -llama-2-chat:13:ggmlv3:q6_K,0.3776041666666667 -llama-2-chat:13:ggmlv3:q4_K_M,0.3776041666666667 -llama-2-chat:13:ggmlv3:q8_0,0.3776041666666667 -llama-2-chat:7:ggmlv3:q6_K,0.35703125 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.35026041666666663 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.34895833333333337 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.34791666666666665 -llama-2-chat:13:ggmlv3:q2_K,0.34226190476190477 -llama-2-chat:13:ggmlv3:q3_K_M,0.3359375 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.33125 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.28125 +Model name,Size,Quantisation,Mean +gpt-3.5-turbo,,,0.8777573529411764 +llama-2-chat,7,q3_K_M,0.51640625 +llama-2-chat,7,q5_0,0.4864583333333333 +llama-2-chat,7,q5_K_M,0.465625 +llama-2-chat,13,q5_0,0.4609375 +llama-2-chat,7,q4_0,0.45 +llama-2-chat,7,q4_K_S,0.4447916666666667 +llama-2-chat,13,q4_1,0.4296875 +llama-2-chat,7,q4_1,0.415625 +llama-2-chat,13,q4_0,0.4078125 +llama-2-chat,7,q4_K_M,0.4075520833333333 +llama-2-chat,7,q8_0,0.403125 +llama-2-chat,13,q5_K_M,0.3984375 +llama-2-chat,13,q4_K_S,0.3802083333333333 +llama-2-chat,13,q8_0,0.3776041666666667 +llama-2-chat,13,q4_K_M,0.3776041666666667 +llama-2-chat,13,q6_K,0.3776041666666667 +llama-2-chat,7,q6_K,0.35703125 +mixtral-instruct-v0.1,46_7,Q2_K,0.35026041666666663 +mixtral-instruct-v0.1,46_7,Q4_0,0.34895833333333337 +mixtral-instruct-v0.1,46_7,Q4_K_M,0.34791666666666665 +llama-2-chat,13,q3_K_M,0.3359375 +mixtral-instruct-v0.1,46_7,Q5_0,0.33125 +llama-2-chat,7,q2_K,0.325765931372549 +llama-2-chat,13,q2_K,0.2994791666666667 +mixtral-instruct-v0.1,46_7,Q8_0,0.28125 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index 0e0a7d3d..88aa638c 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,27 +1,27 @@ -Model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean -llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.4375,0.8333333333333334,0.6,0.0,0.0,0.5,inf,inf -gpt-3.5-turbo,0.6470588235294118,0.6875,1.0,1.0,1.0,0.6875,1.0,1.0,0.8777573529411764 -llama-2-chat:7:ggmlv3:q3_K_M,0.0,0.65625,1.0,0.6,0.5,0.0,0.5,0.875,0.51640625 -llama-2-chat:7:ggmlv3:q5_0,0.0,0.625,1.0,0.6,0.5,0.0,0.5,0.6666666666666666,0.4864583333333333 -llama-2-chat:7:ggmlv3:q5_K_M,0.0,0.625,1.0,0.6,0.0,0.0,0.75,0.75,0.465625 -llama-2-chat:13:ggmlv3:q5_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.5,0.4609375 -llama-2-chat:7:ggmlv3:q4_0,0.0,0.5,1.0,0.6,0.0,0.0,0.5,1.0,0.45 -llama-2-chat:7:ggmlv3:q4_K_S,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.8333333333333334,0.4447916666666667 -llama-2-chat:13:ggmlv3:q4_1,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.25,0.4296875 -llama-2-chat:7:ggmlv3:q4_1,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.6,0.415625 -llama-2-chat:13:ggmlv3:q4_0,0.0,0.5625,1.0,0.2,0.0,0.0,0.5,1.0,0.4078125 -llama-2-chat:7:ggmlv3:q4_K_M,0.0,0.59375,1.0,0.5,0.0,0.0,0.5,0.6666666666666666,0.4075520833333333 -llama-2-chat:7:ggmlv3:q8_0,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.5,0.403125 -llama-2-chat:13:ggmlv3:q5_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.0,0.3984375 -llama-2-chat:13:ggmlv3:q4_K_S,0.0,0.625,1.0,0.0,0.0,0.0,0.75,0.6666666666666666,0.3802083333333333 -llama-2-chat:13:ggmlv3:q6_K,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667 -llama-2-chat:13:ggmlv3:q8_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667 -llama-2-chat:7:ggmlv3:q6_K,0.0,0.65625,1.0,0.2,0.0,0.0,0.5,0.5,0.35703125 -mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.71875,0.3333333333333333,0.0,0.0,0.0,0.75,1.0,0.35026041666666663 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.75,0.4166666666666667,0.0,0.0,0.0,1.0,0.625,0.34895833333333337 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,0.75,0.1666666666666666,0.2,0.0,0.0,1.0,0.6666666666666666,0.34791666666666665 -llama-2-chat:13:ggmlv3:q2_K,0.0,0.5625,0.8333333333333334,0.0,0.0,0.0,1.0,,0.34226190476190477 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,0.6875,0.8333333333333334,0.0,0.0,0.0,0.5,0.6666666666666666,0.3359375 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,0.75,0.0833333333333333,0.4,0.25,0.0,0.5,0.6666666666666666,0.33125 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,0.75,0.0833333333333333,0.0,0.0,0.0,0.75,0.6666666666666666,0.28125 +Model name,Size,Version,Quantisation,Mean,SD +gpt-3.5-turbo,,,,0.8777573529411764,0.1582457961403944 +llama-2-chat,7,ggmlv3,q3_K_M,0.51640625,0.3396479320906245 +llama-2-chat,7,ggmlv3,q5_0,0.4864583333333333,0.3166101237896715 +llama-2-chat,7,ggmlv3,q5_K_M,0.465625,0.37768569522156914 +llama-2-chat,13,ggmlv3,q5_0,0.4609375,0.5330278157317402 +llama-2-chat,7,ggmlv3,q4_0,0.45,0.39370039370059057 +llama-2-chat,7,ggmlv3,q4_K_S,0.4447916666666667,0.37288611373909386 +llama-2-chat,13,ggmlv3,q4_1,0.4296875,0.4754084387069186 +llama-2-chat,7,ggmlv3,q4_1,0.415625,0.34976275441361676 +llama-2-chat,13,ggmlv3,q4_0,0.4078125,0.3993616732909531 +llama-2-chat,7,ggmlv3,q4_K_M,0.4075520833333333,0.347885380173866 +llama-2-chat,7,ggmlv3,q8_0,0.403125,0.3447003907381017 +llama-2-chat,13,ggmlv3,q5_K_M,0.3984375,0.4261215669779576 +llama-2-chat,13,ggmlv3,q4_K_S,0.3802083333333333,0.3938753658448881 +llama-2-chat,13,ggmlv3,q8_0,0.3776041666666667,0.3994404587939581 +llama-2-chat,13,ggmlv3,q4_K_M,0.3776041666666667,0.3994404587939581 +llama-2-chat,13,ggmlv3,q6_K,0.3776041666666667,0.3994404587939581 +llama-2-chat,7,ggmlv3,q6_K,0.35703125,0.3447632438876533 +mixtral-instruct-v0.1,46_7,ggufv2,Q2_K,0.35026041666666663,0.3887084057720928 +mixtral-instruct-v0.1,46_7,ggufv2,Q4_0,0.34895833333333337,0.379565666895876 +mixtral-instruct-v0.1,46_7,ggufv2,Q4_K_M,0.34791666666666665,0.3718793767249447 +llama-2-chat,13,ggmlv3,q3_K_M,0.3359375,0.34617790000931764 +mixtral-instruct-v0.1,46_7,ggufv2,Q5_0,0.33125,0.2758116179770372 +llama-2-chat,7,ggmlv3,q2_K,0.325765931372549,0.29627404541647706 +llama-2-chat,13,ggmlv3,q2_K,0.2994791666666667,0.4020802973762759 +mixtral-instruct-v0.1,46_7,ggufv2,Q8_0,0.28125,0.3434341983715528 diff --git a/benchmark/results/preprocessed_for_frontend/property_exists.csv b/benchmark/results/preprocessed_for_frontend/property_exists.csv index c95820a8..adc4483c 100644 --- a/benchmark/results/preprocessed_for_frontend/property_exists.csv +++ b/benchmark/results/preprocessed_for_frontend/property_exists.csv @@ -1,5 +1,4 @@ Model name,Passed test cases,Total test cases,Score,Iterations -llama-2-chat:7:ggmlv3:q2_K,1.0,0.0,inf,2 llama-2-chat:13:ggmlv3:q5_0,1.5,1.0,1.5,2 llama-2-chat:13:ggmlv3:q4_1,2.5,2.0,1.25,2 gpt-3.5-turbo,4.0,4.0,1.0,2 @@ -8,20 +7,21 @@ llama-2-chat:13:ggmlv3:q5_K_M,3.0,3.0,1.0,2 llama-2-chat:7:ggmlv3:q4_0,4.0,4.0,1.0,2 llama-2-chat:13:ggmlv3:q4_0,1.0,1.0,1.0,2 llama-2-chat:7:ggmlv3:q3_K_M,3.5,4.0,0.875,2 -llama-2-chat:13:ggmlv3:q8_0,2.5,3.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q4_K_M,2.5,3.0,0.8333333333333334,2 llama-2-chat:7:ggmlv3:q4_K_S,2.5,3.0,0.8333333333333334,2 llama-2-chat:13:ggmlv3:q6_K,2.5,3.0,0.8333333333333334,2 +llama-2-chat:13:ggmlv3:q8_0,2.5,3.0,0.8333333333333334,2 +llama-2-chat:13:ggmlv3:q4_K_M,2.5,3.0,0.8333333333333334,2 llama-2-chat:7:ggmlv3:q5_K_M,3.0,4.0,0.75,2 -llama-2-chat:13:ggmlv3:q4_K_S,2.0,3.0,0.6666666666666666,2 +llama-2-chat:7:ggmlv3:q5_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 -llama-2-chat:7:ggmlv3:q5_0,2.0,3.0,0.6666666666666666,2 -llama-2-chat:7:ggmlv3:q4_K_M,2.0,3.0,0.6666666666666666,2 llama-2-chat:13:ggmlv3:q3_K_M,2.0,3.0,0.6666666666666666,2 +llama-2-chat:13:ggmlv3:q4_K_S,2.0,3.0,0.6666666666666666,2 +llama-2-chat:7:ggmlv3:q4_K_M,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,4.0,0.625,2 llama-2-chat:7:ggmlv3:q4_1,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q6_K,2.0,4.0,0.5,2 llama-2-chat:7:ggmlv3:q8_0,2.5,5.0,0.5,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,0.0,,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,0.0,0.0,2 +llama-2-chat:7:ggmlv3:q2_K,1.0,0.0,0.0,2 diff --git a/docs/scripts/hooks.py b/docs/scripts/hooks.py index 88f4c115..3c984a7e 100644 --- a/docs/scripts/hooks.py +++ b/docs/scripts/hooks.py @@ -66,9 +66,13 @@ def preprocess_results_for_frontend( } ) - aggregated_scores["Score"] = ( - aggregated_scores["passed_test_cases"] - / aggregated_scores["number_test_cases"] + aggregated_scores["Score"] = aggregated_scores.apply( + lambda row: ( + row["passed_test_cases"] / row["number_test_cases"] + if row["number_test_cases"] != 0 + else 0 + ), + axis=1, ) aggregated_scores["Model name"] = aggregated_scores.index.get_level_values( @@ -118,12 +122,36 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]): subtask_results.append(subtask_result) overview = pd.concat(subtask_results, axis=1) overview["Mean"] = overview.mean(axis=1) + overview["SD"] = overview.std(axis=1) overview = overview.sort_values(by="Mean", ascending=False) + # split "Model name" at : to get Model name, size, version, and quantisation + overview["Model name"] = overview.index + overview[["Model name", "Size", "Version", "Quantisation"]] = overview[ + "Model name" + ].str.split(":", expand=True) + overview = overview[ + [ + "Model name", + "Size", + "Version", + "Quantisation", + "Mean", + "SD", + ] + ] overview.to_csv( - f"{result_files_path}preprocessed_for_frontend/overview.csv", index=True + f"{result_files_path}preprocessed_for_frontend/overview.csv", + index=False, ) - overview_aggregated = overview[["Mean"]] + + overview_aggregated = overview[ + ["Model name", "Size", "Quantisation", "Mean"] + ] overview_aggregated.to_csv( f"{result_files_path}preprocessed_for_frontend/overview-aggregated.csv", - index=True, + index=False, ) + + +if __name__ == "__main__": + on_pre_build(None) From 79dbc93af7988e14a62fd975d87d4ebe4eff17ae Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sat, 3 Feb 2024 20:38:21 +0100 Subject: [PATCH 23/32] add SD to aggregated table --- docs/scripts/hooks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/scripts/hooks.py b/docs/scripts/hooks.py index 3c984a7e..0546a6ec 100644 --- a/docs/scripts/hooks.py +++ b/docs/scripts/hooks.py @@ -145,7 +145,7 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]): ) overview_aggregated = overview[ - ["Model name", "Size", "Quantisation", "Mean"] + ["Model name", "Size", "Quantisation", "Mean", "SD"] ] overview_aggregated.to_csv( f"{result_files_path}preprocessed_for_frontend/overview-aggregated.csv", From 27c5a4dbb97eb7b9d7491f8d9abe0b2a7e2ab289 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sat, 3 Feb 2024 20:42:33 +0100 Subject: [PATCH 24/32] round mean values --- .../overview-aggregated.csv | 54 +++++++++---------- .../preprocessed_for_frontend/overview.csv | 50 ++++++++--------- docs/benchmark-overview.md | 2 +- docs/scripts/hooks.py | 3 ++ 4 files changed, 56 insertions(+), 53 deletions(-) diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv index 8baec328..7f3da16d 100644 --- a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv +++ b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv @@ -1,27 +1,27 @@ -Model name,Size,Quantisation,Mean -gpt-3.5-turbo,,,0.8777573529411764 -llama-2-chat,7,q3_K_M,0.51640625 -llama-2-chat,7,q5_0,0.4864583333333333 -llama-2-chat,7,q5_K_M,0.465625 -llama-2-chat,13,q5_0,0.4609375 -llama-2-chat,7,q4_0,0.45 -llama-2-chat,7,q4_K_S,0.4447916666666667 -llama-2-chat,13,q4_1,0.4296875 -llama-2-chat,7,q4_1,0.415625 -llama-2-chat,13,q4_0,0.4078125 -llama-2-chat,7,q4_K_M,0.4075520833333333 -llama-2-chat,7,q8_0,0.403125 -llama-2-chat,13,q5_K_M,0.3984375 -llama-2-chat,13,q4_K_S,0.3802083333333333 -llama-2-chat,13,q8_0,0.3776041666666667 -llama-2-chat,13,q4_K_M,0.3776041666666667 -llama-2-chat,13,q6_K,0.3776041666666667 -llama-2-chat,7,q6_K,0.35703125 -mixtral-instruct-v0.1,46_7,Q2_K,0.35026041666666663 -mixtral-instruct-v0.1,46_7,Q4_0,0.34895833333333337 -mixtral-instruct-v0.1,46_7,Q4_K_M,0.34791666666666665 -llama-2-chat,13,q3_K_M,0.3359375 -mixtral-instruct-v0.1,46_7,Q5_0,0.33125 -llama-2-chat,7,q2_K,0.325765931372549 -llama-2-chat,13,q2_K,0.2994791666666667 -mixtral-instruct-v0.1,46_7,Q8_0,0.28125 +Model name,Size,Quantisation,Mean,SD +gpt-3.5-turbo,,,0.88,0.16 +llama-2-chat,7.0,q3_K_M,0.52,0.34 +llama-2-chat,7.0,q5_0,0.49,0.32 +llama-2-chat,7.0,q5_K_M,0.47,0.38 +llama-2-chat,13.0,q5_0,0.46,0.53 +llama-2-chat,7.0,q4_0,0.45,0.39 +llama-2-chat,7.0,q4_K_S,0.44,0.37 +llama-2-chat,13.0,q4_1,0.43,0.48 +llama-2-chat,7.0,q4_1,0.42,0.35 +llama-2-chat,13.0,q4_0,0.41,0.4 +llama-2-chat,7.0,q4_K_M,0.41,0.35 +llama-2-chat,7.0,q8_0,0.4,0.34 +llama-2-chat,13.0,q5_K_M,0.4,0.43 +llama-2-chat,13.0,q4_K_S,0.38,0.39 +llama-2-chat,13.0,q8_0,0.38,0.4 +llama-2-chat,13.0,q4_K_M,0.38,0.4 +llama-2-chat,13.0,q6_K,0.38,0.4 +llama-2-chat,7.0,q6_K,0.36,0.34 +mixtral-instruct-v0.1,467.0,Q2_K,0.35,0.39 +mixtral-instruct-v0.1,467.0,Q4_0,0.35,0.38 +mixtral-instruct-v0.1,467.0,Q4_K_M,0.35,0.37 +llama-2-chat,13.0,q3_K_M,0.34,0.35 +mixtral-instruct-v0.1,467.0,Q5_0,0.33,0.28 +llama-2-chat,7.0,q2_K,0.33,0.3 +llama-2-chat,13.0,q2_K,0.3,0.4 +mixtral-instruct-v0.1,467.0,Q8_0,0.28,0.34 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index 88aa638c..31d5aeed 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,27 +1,27 @@ Model name,Size,Version,Quantisation,Mean,SD gpt-3.5-turbo,,,,0.8777573529411764,0.1582457961403944 -llama-2-chat,7,ggmlv3,q3_K_M,0.51640625,0.3396479320906245 -llama-2-chat,7,ggmlv3,q5_0,0.4864583333333333,0.3166101237896715 -llama-2-chat,7,ggmlv3,q5_K_M,0.465625,0.37768569522156914 -llama-2-chat,13,ggmlv3,q5_0,0.4609375,0.5330278157317402 -llama-2-chat,7,ggmlv3,q4_0,0.45,0.39370039370059057 -llama-2-chat,7,ggmlv3,q4_K_S,0.4447916666666667,0.37288611373909386 -llama-2-chat,13,ggmlv3,q4_1,0.4296875,0.4754084387069186 -llama-2-chat,7,ggmlv3,q4_1,0.415625,0.34976275441361676 -llama-2-chat,13,ggmlv3,q4_0,0.4078125,0.3993616732909531 -llama-2-chat,7,ggmlv3,q4_K_M,0.4075520833333333,0.347885380173866 -llama-2-chat,7,ggmlv3,q8_0,0.403125,0.3447003907381017 -llama-2-chat,13,ggmlv3,q5_K_M,0.3984375,0.4261215669779576 -llama-2-chat,13,ggmlv3,q4_K_S,0.3802083333333333,0.3938753658448881 -llama-2-chat,13,ggmlv3,q8_0,0.3776041666666667,0.3994404587939581 -llama-2-chat,13,ggmlv3,q4_K_M,0.3776041666666667,0.3994404587939581 -llama-2-chat,13,ggmlv3,q6_K,0.3776041666666667,0.3994404587939581 -llama-2-chat,7,ggmlv3,q6_K,0.35703125,0.3447632438876533 -mixtral-instruct-v0.1,46_7,ggufv2,Q2_K,0.35026041666666663,0.3887084057720928 -mixtral-instruct-v0.1,46_7,ggufv2,Q4_0,0.34895833333333337,0.379565666895876 -mixtral-instruct-v0.1,46_7,ggufv2,Q4_K_M,0.34791666666666665,0.3718793767249447 -llama-2-chat,13,ggmlv3,q3_K_M,0.3359375,0.34617790000931764 -mixtral-instruct-v0.1,46_7,ggufv2,Q5_0,0.33125,0.2758116179770372 -llama-2-chat,7,ggmlv3,q2_K,0.325765931372549,0.29627404541647706 -llama-2-chat,13,ggmlv3,q2_K,0.2994791666666667,0.4020802973762759 -mixtral-instruct-v0.1,46_7,ggufv2,Q8_0,0.28125,0.3434341983715528 +llama-2-chat,7.0,ggmlv3,q3_K_M,0.51640625,0.3396479320906245 +llama-2-chat,7.0,ggmlv3,q5_0,0.4864583333333333,0.3166101237896715 +llama-2-chat,7.0,ggmlv3,q5_K_M,0.465625,0.37768569522156914 +llama-2-chat,13.0,ggmlv3,q5_0,0.4609375,0.5330278157317402 +llama-2-chat,7.0,ggmlv3,q4_0,0.45,0.39370039370059057 +llama-2-chat,7.0,ggmlv3,q4_K_S,0.4447916666666667,0.37288611373909386 +llama-2-chat,13.0,ggmlv3,q4_1,0.4296875,0.4754084387069186 +llama-2-chat,7.0,ggmlv3,q4_1,0.415625,0.34976275441361676 +llama-2-chat,13.0,ggmlv3,q4_0,0.4078125,0.3993616732909531 +llama-2-chat,7.0,ggmlv3,q4_K_M,0.4075520833333333,0.347885380173866 +llama-2-chat,7.0,ggmlv3,q8_0,0.403125,0.3447003907381017 +llama-2-chat,13.0,ggmlv3,q5_K_M,0.3984375,0.4261215669779576 +llama-2-chat,13.0,ggmlv3,q4_K_S,0.3802083333333333,0.3938753658448881 +llama-2-chat,13.0,ggmlv3,q8_0,0.3776041666666667,0.3994404587939581 +llama-2-chat,13.0,ggmlv3,q4_K_M,0.3776041666666667,0.3994404587939581 +llama-2-chat,13.0,ggmlv3,q6_K,0.3776041666666667,0.3994404587939581 +llama-2-chat,7.0,ggmlv3,q6_K,0.35703125,0.3447632438876533 +mixtral-instruct-v0.1,467.0,ggufv2,Q2_K,0.35026041666666663,0.3887084057720928 +mixtral-instruct-v0.1,467.0,ggufv2,Q4_0,0.34895833333333337,0.379565666895876 +mixtral-instruct-v0.1,467.0,ggufv2,Q4_K_M,0.34791666666666665,0.3718793767249447 +llama-2-chat,13.0,ggmlv3,q3_K_M,0.3359375,0.34617790000931764 +mixtral-instruct-v0.1,467.0,ggufv2,Q5_0,0.33125,0.2758116179770372 +llama-2-chat,7.0,ggmlv3,q2_K,0.325765931372549,0.29627404541647706 +llama-2-chat,13.0,ggmlv3,q2_K,0.2994791666666667,0.4020802973762759 +mixtral-instruct-v0.1,467.0,ggufv2,Q8_0,0.28125,0.3434341983715528 diff --git a/docs/benchmark-overview.md b/docs/benchmark-overview.md index d63982b3..07471974 100644 --- a/docs/benchmark-overview.md +++ b/docs/benchmark-overview.md @@ -5,7 +5,7 @@ Table sorted by mean score in descending order. Click the column names to reorder. -{{ read_csv('benchmark/results/preprocessed_for_frontend/overview-aggregated.csv', colalign=("center","center")) }} +{{ read_csv('benchmark/results/preprocessed_for_frontend/overview-aggregated.csv', colalign=("center","center","center","center","center")) }} ## Including all tasks diff --git a/docs/scripts/hooks.py b/docs/scripts/hooks.py index 0546a6ec..596da8d8 100644 --- a/docs/scripts/hooks.py +++ b/docs/scripts/hooks.py @@ -147,6 +147,9 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]): overview_aggregated = overview[ ["Model name", "Size", "Quantisation", "Mean", "SD"] ] + # round mean and sd to 2 decimal places + overview_aggregated["Mean"] = overview_aggregated["Mean"].round(2) + overview_aggregated["SD"] = overview_aggregated["SD"].round(2) overview_aggregated.to_csv( f"{result_files_path}preprocessed_for_frontend/overview-aggregated.csv", index=False, From cafc91f1ba6199d714356caeddb2ad582074d872 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sun, 4 Feb 2024 13:20:46 +0100 Subject: [PATCH 25/32] size decimal point --- .../overview-aggregated.csv | 52 +++++++++---------- .../preprocessed_for_frontend/overview.csv | 52 +++++++++---------- docs/scripts/hooks.py | 2 + 3 files changed, 54 insertions(+), 52 deletions(-) diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv index 7f3da16d..0a7b44d4 100644 --- a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv +++ b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv @@ -1,27 +1,27 @@ Model name,Size,Quantisation,Mean,SD -gpt-3.5-turbo,,,0.88,0.16 -llama-2-chat,7.0,q3_K_M,0.52,0.34 -llama-2-chat,7.0,q5_0,0.49,0.32 -llama-2-chat,7.0,q5_K_M,0.47,0.38 -llama-2-chat,13.0,q5_0,0.46,0.53 -llama-2-chat,7.0,q4_0,0.45,0.39 -llama-2-chat,7.0,q4_K_S,0.44,0.37 -llama-2-chat,13.0,q4_1,0.43,0.48 -llama-2-chat,7.0,q4_1,0.42,0.35 -llama-2-chat,13.0,q4_0,0.41,0.4 -llama-2-chat,7.0,q4_K_M,0.41,0.35 -llama-2-chat,7.0,q8_0,0.4,0.34 -llama-2-chat,13.0,q5_K_M,0.4,0.43 -llama-2-chat,13.0,q4_K_S,0.38,0.39 -llama-2-chat,13.0,q8_0,0.38,0.4 -llama-2-chat,13.0,q4_K_M,0.38,0.4 -llama-2-chat,13.0,q6_K,0.38,0.4 -llama-2-chat,7.0,q6_K,0.36,0.34 -mixtral-instruct-v0.1,467.0,Q2_K,0.35,0.39 -mixtral-instruct-v0.1,467.0,Q4_0,0.35,0.38 -mixtral-instruct-v0.1,467.0,Q4_K_M,0.35,0.37 -llama-2-chat,13.0,q3_K_M,0.34,0.35 -mixtral-instruct-v0.1,467.0,Q5_0,0.33,0.28 -llama-2-chat,7.0,q2_K,0.33,0.3 -llama-2-chat,13.0,q2_K,0.3,0.4 -mixtral-instruct-v0.1,467.0,Q8_0,0.28,0.34 +gpt-3.5-turbo,NA,NA,0.88,0.16 +llama-2-chat,7,q3_K_M,0.52,0.34 +llama-2-chat,7,q5_0,0.49,0.32 +llama-2-chat,7,q5_K_M,0.47,0.38 +llama-2-chat,13,q5_0,0.46,0.53 +llama-2-chat,7,q4_0,0.45,0.39 +llama-2-chat,7,q4_K_S,0.44,0.37 +llama-2-chat,13,q4_1,0.43,0.48 +llama-2-chat,7,q4_1,0.42,0.35 +llama-2-chat,13,q4_0,0.41,0.4 +llama-2-chat,7,q4_K_M,0.41,0.35 +llama-2-chat,7,q8_0,0.4,0.34 +llama-2-chat,13,q5_K_M,0.4,0.43 +llama-2-chat,13,q4_K_S,0.38,0.39 +llama-2-chat,13,q8_0,0.38,0.4 +llama-2-chat,13,q4_K_M,0.38,0.4 +llama-2-chat,13,q6_K,0.38,0.4 +llama-2-chat,7,q6_K,0.36,0.34 +mixtral-instruct-v0.1,"46,7",Q2_K,0.35,0.39 +mixtral-instruct-v0.1,"46,7",Q4_0,0.35,0.38 +mixtral-instruct-v0.1,"46,7",Q4_K_M,0.35,0.37 +llama-2-chat,13,q3_K_M,0.34,0.35 +mixtral-instruct-v0.1,"46,7",Q5_0,0.33,0.28 +llama-2-chat,7,q2_K,0.33,0.3 +llama-2-chat,13,q2_K,0.3,0.4 +mixtral-instruct-v0.1,"46,7",Q8_0,0.28,0.34 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index 31d5aeed..78942d0a 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,27 +1,27 @@ Model name,Size,Version,Quantisation,Mean,SD -gpt-3.5-turbo,,,,0.8777573529411764,0.1582457961403944 -llama-2-chat,7.0,ggmlv3,q3_K_M,0.51640625,0.3396479320906245 -llama-2-chat,7.0,ggmlv3,q5_0,0.4864583333333333,0.3166101237896715 -llama-2-chat,7.0,ggmlv3,q5_K_M,0.465625,0.37768569522156914 -llama-2-chat,13.0,ggmlv3,q5_0,0.4609375,0.5330278157317402 -llama-2-chat,7.0,ggmlv3,q4_0,0.45,0.39370039370059057 -llama-2-chat,7.0,ggmlv3,q4_K_S,0.4447916666666667,0.37288611373909386 -llama-2-chat,13.0,ggmlv3,q4_1,0.4296875,0.4754084387069186 -llama-2-chat,7.0,ggmlv3,q4_1,0.415625,0.34976275441361676 -llama-2-chat,13.0,ggmlv3,q4_0,0.4078125,0.3993616732909531 -llama-2-chat,7.0,ggmlv3,q4_K_M,0.4075520833333333,0.347885380173866 -llama-2-chat,7.0,ggmlv3,q8_0,0.403125,0.3447003907381017 -llama-2-chat,13.0,ggmlv3,q5_K_M,0.3984375,0.4261215669779576 -llama-2-chat,13.0,ggmlv3,q4_K_S,0.3802083333333333,0.3938753658448881 -llama-2-chat,13.0,ggmlv3,q8_0,0.3776041666666667,0.3994404587939581 -llama-2-chat,13.0,ggmlv3,q4_K_M,0.3776041666666667,0.3994404587939581 -llama-2-chat,13.0,ggmlv3,q6_K,0.3776041666666667,0.3994404587939581 -llama-2-chat,7.0,ggmlv3,q6_K,0.35703125,0.3447632438876533 -mixtral-instruct-v0.1,467.0,ggufv2,Q2_K,0.35026041666666663,0.3887084057720928 -mixtral-instruct-v0.1,467.0,ggufv2,Q4_0,0.34895833333333337,0.379565666895876 -mixtral-instruct-v0.1,467.0,ggufv2,Q4_K_M,0.34791666666666665,0.3718793767249447 -llama-2-chat,13.0,ggmlv3,q3_K_M,0.3359375,0.34617790000931764 -mixtral-instruct-v0.1,467.0,ggufv2,Q5_0,0.33125,0.2758116179770372 -llama-2-chat,7.0,ggmlv3,q2_K,0.325765931372549,0.29627404541647706 -llama-2-chat,13.0,ggmlv3,q2_K,0.2994791666666667,0.4020802973762759 -mixtral-instruct-v0.1,467.0,ggufv2,Q8_0,0.28125,0.3434341983715528 +gpt-3.5-turbo,NA,,NA,0.8777573529411764,0.1582457961403944 +llama-2-chat,7,ggmlv3,q3_K_M,0.51640625,0.3396479320906245 +llama-2-chat,7,ggmlv3,q5_0,0.4864583333333333,0.3166101237896715 +llama-2-chat,7,ggmlv3,q5_K_M,0.465625,0.37768569522156914 +llama-2-chat,13,ggmlv3,q5_0,0.4609375,0.5330278157317402 +llama-2-chat,7,ggmlv3,q4_0,0.45,0.39370039370059057 +llama-2-chat,7,ggmlv3,q4_K_S,0.4447916666666667,0.37288611373909386 +llama-2-chat,13,ggmlv3,q4_1,0.4296875,0.4754084387069186 +llama-2-chat,7,ggmlv3,q4_1,0.415625,0.34976275441361676 +llama-2-chat,13,ggmlv3,q4_0,0.4078125,0.3993616732909531 +llama-2-chat,7,ggmlv3,q4_K_M,0.4075520833333333,0.347885380173866 +llama-2-chat,7,ggmlv3,q8_0,0.403125,0.3447003907381017 +llama-2-chat,13,ggmlv3,q5_K_M,0.3984375,0.4261215669779576 +llama-2-chat,13,ggmlv3,q4_K_S,0.3802083333333333,0.3938753658448881 +llama-2-chat,13,ggmlv3,q8_0,0.3776041666666667,0.3994404587939581 +llama-2-chat,13,ggmlv3,q4_K_M,0.3776041666666667,0.3994404587939581 +llama-2-chat,13,ggmlv3,q6_K,0.3776041666666667,0.3994404587939581 +llama-2-chat,7,ggmlv3,q6_K,0.35703125,0.3447632438876533 +mixtral-instruct-v0.1,"46,7",ggufv2,Q2_K,0.35026041666666663,0.3887084057720928 +mixtral-instruct-v0.1,"46,7",ggufv2,Q4_0,0.34895833333333337,0.379565666895876 +mixtral-instruct-v0.1,"46,7",ggufv2,Q4_K_M,0.34791666666666665,0.3718793767249447 +llama-2-chat,13,ggmlv3,q3_K_M,0.3359375,0.34617790000931764 +mixtral-instruct-v0.1,"46,7",ggufv2,Q5_0,0.33125,0.2758116179770372 +llama-2-chat,7,ggmlv3,q2_K,0.325765931372549,0.29627404541647706 +llama-2-chat,13,ggmlv3,q2_K,0.2994791666666667,0.4020802973762759 +mixtral-instruct-v0.1,"46,7",ggufv2,Q8_0,0.28125,0.3434341983715528 diff --git a/docs/scripts/hooks.py b/docs/scripts/hooks.py index 596da8d8..a6deac9b 100644 --- a/docs/scripts/hooks.py +++ b/docs/scripts/hooks.py @@ -129,6 +129,8 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]): overview[["Model name", "Size", "Version", "Quantisation"]] = overview[ "Model name" ].str.split(":", expand=True) + # convert underscores in Size to commas + overview["Size"] = overview["Size"].str.replace("_", ",") overview = overview[ [ "Model name", From 6c146879c571a18d4774e54b00d8a2a12a964e48 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sun, 4 Feb 2024 13:25:07 +0100 Subject: [PATCH 26/32] run gpt4 benchmark --- benchmark/results/end_to_end_query_generation.csv | 2 ++ benchmark/results/entity_selection.csv | 2 ++ .../results/explicit_relevance_of_single_fragments.csv | 6 ++++++ .../results/implicit_relevance_of_multiple_fragments.csv | 2 ++ benchmark/results/property_exists.csv | 2 ++ benchmark/results/property_selection.csv | 2 ++ benchmark/results/query_generation.csv | 2 ++ benchmark/results/relationship_selection.csv | 2 ++ 8 files changed, 20 insertions(+) diff --git a/benchmark/results/end_to_end_query_generation.csv b/benchmark/results/end_to_end_query_generation.csv index d9ec6dff..a6fbfff2 100644 --- a/benchmark/results/end_to_end_query_generation.csv +++ b/benchmark/results/end_to_end_query_generation.csv @@ -1,6 +1,8 @@ model_name,subtask,score,iterations,md5_hash gpt-3.5-turbo,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +gpt-4,multi_word,5.5/8,2,f29b6faf7d003159d43a5d1cf451587f +gpt-4,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 86385748..3cf47398 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -1,6 +1,8 @@ model_name,subtask,score,iterations,md5_hash gpt-3.5-turbo,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec +gpt-4,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +gpt-4,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index 49b5ad8e..9a3a604d 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -5,6 +5,12 @@ gpt-3.5-turbo,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3 gpt-3.5-turbo,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 gpt-3.5-turbo,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 gpt-3.5-turbo,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +gpt-4,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +gpt-4,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +gpt-4,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +gpt-4,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +gpt-4,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +gpt-4,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 llama-2-chat:13:ggmlv3:q2_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 llama-2-chat:13:ggmlv3:q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv index 1d800940..43051e43 100644 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -1,6 +1,8 @@ model_name,subtask,score,iterations,md5_hash gpt-3.5-turbo,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 gpt-3.5-turbo,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +gpt-4,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +gpt-4,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 llama-2-chat:13:ggmlv3:q2_K,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 llama-2-chat:13:ggmlv3:q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 llama-2-chat:13:ggmlv3:q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 diff --git a/benchmark/results/property_exists.csv b/benchmark/results/property_exists.csv index 96671ec9..2d6f1eef 100644 --- a/benchmark/results/property_exists.csv +++ b/benchmark/results/property_exists.csv @@ -1,6 +1,8 @@ model_name,subtask,score,iterations,md5_hash gpt-3.5-turbo,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec +gpt-4,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +gpt-4,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/0,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/0,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q3_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/property_selection.csv b/benchmark/results/property_selection.csv index 03475ee5..58b71a5a 100644 --- a/benchmark/results/property_selection.csv +++ b/benchmark/results/property_selection.csv @@ -1,6 +1,8 @@ model_name,subtask,score,iterations,md5_hash gpt-3.5-turbo,multi_word,5.0/7,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,6.0/10,2,e447d738f5e035cde60d624eadb79fec +gpt-4,multi_word,7.0/7,2,f29b6faf7d003159d43a5d1cf451587f +gpt-4,single_word,6.0/10,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/query_generation.csv b/benchmark/results/query_generation.csv index c4c2a37b..4bfc3407 100644 --- a/benchmark/results/query_generation.csv +++ b/benchmark/results/query_generation.csv @@ -1,6 +1,8 @@ model_name,subtask,score,iterations,md5_hash gpt-3.5-turbo,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +gpt-4,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +gpt-4,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q2_K,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggmlv3:q2_K,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q3_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/relationship_selection.csv b/benchmark/results/relationship_selection.csv index 9d75eed5..162e861b 100644 --- a/benchmark/results/relationship_selection.csv +++ b/benchmark/results/relationship_selection.csv @@ -1,6 +1,8 @@ model_name,subtask,score,iterations,md5_hash gpt-3.5-turbo,multi_word,3.0/3,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec +gpt-4,multi_word,3.0/3,2,f29b6faf7d003159d43a5d1cf451587f +gpt-4,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f From 0f6e98edf0d5d8036df629ccc1136ee0ed4de3ee Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Sun, 4 Feb 2024 13:30:38 +0100 Subject: [PATCH 27/32] reintroduce single tasks in overview (non aggregated) --- .../end_to_end_query_generation.csv | 5 +- .../entity_selection.csv | 11 ++-- ...explicit_relevance_of_single_fragments.csv | 5 +- ...plicit_relevance_of_multiple_fragments.csv | 5 +- .../overview-aggregated.csv | 55 ++++++++++--------- .../preprocessed_for_frontend/overview.csv | 55 ++++++++++--------- .../property_exists.csv | 11 ++-- .../property_selection.csv | 5 +- .../query_generation.csv | 13 +++-- .../relationship_selection.csv | 13 +++-- docs/scripts/hooks.py | 28 +++++----- 11 files changed, 108 insertions(+), 98 deletions(-) diff --git a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv index c2ab0bb7..ba143461 100644 --- a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv @@ -1,6 +1,7 @@ Model name,Passed test cases,Total test cases,Score,Iterations +gpt-4,11.5,16.0,0.71875,2 gpt-3.5-turbo,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,16.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_0,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,16.0,0.0,2 @@ -12,7 +13,6 @@ llama-2-chat:7:ggmlv3:q5_0,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_S,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_M,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q4_1,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q3_K_M,0.0,16.0,0.0,2 llama-2-chat:7:ggmlv3:q2_K,0.0,16.0,0.0,2 llama-2-chat:13:ggmlv3:q8_0,0.0,16.0,0.0,2 @@ -24,4 +24,5 @@ llama-2-chat:13:ggmlv3:q4_K_M,0.0,16.0,0.0,2 llama-2-chat:13:ggmlv3:q4_1,0.0,16.0,0.0,2 llama-2-chat:13:ggmlv3:q4_0,0.0,16.0,0.0,2 llama-2-chat:13:ggmlv3:q3_K_M,0.0,16.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,16.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/entity_selection.csv b/benchmark/results/preprocessed_for_frontend/entity_selection.csv index 52482637..bd93b80f 100644 --- a/benchmark/results/preprocessed_for_frontend/entity_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/entity_selection.csv @@ -1,27 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,4.0,4.0,1.0,2 +gpt-4,4.0,4.0,1.0,2 llama-2-chat:7:ggmlv3:q2_K,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q8_0,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q5_K_M,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q5_0,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q4_K_S,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q4_1,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q3_K_M,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q4_0,3.0,5.0,0.6,2 +llama-2-chat:7:ggmlv3:q3_K_M,3.0,5.0,0.6,2 llama-2-chat:7:ggmlv3:q4_K_M,2.5,5.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,5.0,0.4,2 llama-2-chat:13:ggmlv3:q4_0,1.0,5.0,0.2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,5.0,0.2,2 llama-2-chat:7:ggmlv3:q6_K,1.0,5.0,0.2,2 -llama-2-chat:13:ggmlv3:q6_K,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_1,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q6_K,0.0,5.0,0.0,2 llama-2-chat:13:ggmlv3:q8_0,0.0,5.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_1,0.0,5.0,0.0,2 llama-2-chat:13:ggmlv3:q4_K_M,0.0,5.0,0.0,2 llama-2-chat:13:ggmlv3:q4_K_S,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,5.0,0.0,2 llama-2-chat:13:ggmlv3:q5_0,0.0,5.0,0.0,2 llama-2-chat:13:ggmlv3:q5_K_M,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,5.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv index 614a0acd..6c2b93eb 100644 --- a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv @@ -8,8 +8,9 @@ llama-2-chat:7:ggmlv3:q5_0,6.0,6.0,1.0,2 llama-2-chat:7:ggmlv3:q4_K_S,6.0,6.0,1.0,2 llama-2-chat:7:ggmlv3:q4_K_M,6.0,6.0,1.0,2 llama-2-chat:7:ggmlv3:q4_1,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q3_K_M,6.0,6.0,1.0,2 llama-2-chat:7:ggmlv3:q4_0,6.0,6.0,1.0,2 +gpt-4,6.0,6.0,1.0,2 +llama-2-chat:7:ggmlv3:q3_K_M,6.0,6.0,1.0,2 llama-2-chat:13:ggmlv3:q6_K,6.0,6.0,1.0,2 llama-2-chat:13:ggmlv3:q5_K_M,6.0,6.0,1.0,2 llama-2-chat:13:ggmlv3:q5_0,6.0,6.0,1.0,2 @@ -18,8 +19,8 @@ llama-2-chat:13:ggmlv3:q4_K_M,6.0,6.0,1.0,2 llama-2-chat:13:ggmlv3:q4_1,6.0,6.0,1.0,2 llama-2-chat:13:ggmlv3:q4_0,6.0,6.0,1.0,2 llama-2-chat:7:ggmlv3:q2_K,5.0,6.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q2_K,5.0,6.0,0.8333333333333334,2 llama-2-chat:13:ggmlv3:q3_K_M,5.0,6.0,0.8333333333333334,2 +llama-2-chat:13:ggmlv3:q2_K,5.0,6.0,0.8333333333333334,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,6.0,0.4166666666666667,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2.0,6.0,0.3333333333333333,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,6.0,0.16666666666666666,2 diff --git a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv index 63101439..aeed7928 100644 --- a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv @@ -1,5 +1,6 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,2.0,2.0,1.0,2 +gpt-4,2.0,2.0,1.0,2 llama-2-chat:13:ggmlv3:q2_K,2.0,2.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,2.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.0,2.0,1.0,2 @@ -20,8 +21,8 @@ llama-2-chat:7:ggmlv3:q4_K_S,1.0,2.0,0.5,2 llama-2-chat:13:ggmlv3:q5_K_M,1.0,2.0,0.5,2 llama-2-chat:7:ggmlv3:q4_K_M,1.0,2.0,0.5,2 llama-2-chat:7:ggmlv3:q4_1,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q3_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q4_0,1.0,2.0,0.5,2 llama-2-chat:7:ggmlv3:q2_K,1.0,2.0,0.5,2 llama-2-chat:13:ggmlv3:q8_0,1.0,2.0,0.5,2 llama-2-chat:13:ggmlv3:q6_K,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_0,1.0,2.0,0.5,2 +llama-2-chat:7:ggmlv3:q3_K_M,1.0,2.0,0.5,2 diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv index 0a7b44d4..e841ef0b 100644 --- a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv +++ b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv @@ -1,27 +1,28 @@ -Model name,Size,Quantisation,Mean,SD -gpt-3.5-turbo,NA,NA,0.88,0.16 -llama-2-chat,7,q3_K_M,0.52,0.34 -llama-2-chat,7,q5_0,0.49,0.32 -llama-2-chat,7,q5_K_M,0.47,0.38 -llama-2-chat,13,q5_0,0.46,0.53 -llama-2-chat,7,q4_0,0.45,0.39 -llama-2-chat,7,q4_K_S,0.44,0.37 -llama-2-chat,13,q4_1,0.43,0.48 -llama-2-chat,7,q4_1,0.42,0.35 -llama-2-chat,13,q4_0,0.41,0.4 -llama-2-chat,7,q4_K_M,0.41,0.35 -llama-2-chat,7,q8_0,0.4,0.34 -llama-2-chat,13,q5_K_M,0.4,0.43 -llama-2-chat,13,q4_K_S,0.38,0.39 -llama-2-chat,13,q8_0,0.38,0.4 -llama-2-chat,13,q4_K_M,0.38,0.4 -llama-2-chat,13,q6_K,0.38,0.4 -llama-2-chat,7,q6_K,0.36,0.34 -mixtral-instruct-v0.1,"46,7",Q2_K,0.35,0.39 -mixtral-instruct-v0.1,"46,7",Q4_0,0.35,0.38 -mixtral-instruct-v0.1,"46,7",Q4_K_M,0.35,0.37 -llama-2-chat,13,q3_K_M,0.34,0.35 -mixtral-instruct-v0.1,"46,7",Q5_0,0.33,0.28 -llama-2-chat,7,q2_K,0.33,0.3 -llama-2-chat,13,q2_K,0.3,0.4 -mixtral-instruct-v0.1,"46,7",Q8_0,0.28,0.34 +Model name,Size,Version,Quantisation,Mean,SD +gpt-4,,,,0.9,0.12 +gpt-3.5-turbo,,,,0.88,0.16 +llama-2-chat,7,ggmlv3,q3_K_M,0.52,0.34 +llama-2-chat,7,ggmlv3,q5_0,0.49,0.32 +llama-2-chat,7,ggmlv3,q5_K_M,0.47,0.38 +llama-2-chat,13,ggmlv3,q5_0,0.46,0.53 +llama-2-chat,7,ggmlv3,q4_0,0.45,0.39 +llama-2-chat,7,ggmlv3,q4_K_S,0.44,0.37 +llama-2-chat,13,ggmlv3,q4_1,0.43,0.48 +llama-2-chat,7,ggmlv3,q4_1,0.42,0.35 +llama-2-chat,13,ggmlv3,q4_0,0.41,0.4 +llama-2-chat,7,ggmlv3,q4_K_M,0.41,0.35 +llama-2-chat,7,ggmlv3,q8_0,0.4,0.34 +llama-2-chat,13,ggmlv3,q5_K_M,0.4,0.43 +llama-2-chat,13,ggmlv3,q4_K_S,0.38,0.39 +llama-2-chat,13,ggmlv3,q8_0,0.38,0.4 +llama-2-chat,13,ggmlv3,q6_K,0.38,0.4 +llama-2-chat,13,ggmlv3,q4_K_M,0.38,0.4 +llama-2-chat,7,ggmlv3,q6_K,0.36,0.34 +mixtral-instruct-v0.1,"46,7",ggufv2,Q2_K,0.35,0.39 +mixtral-instruct-v0.1,"46,7",ggufv2,Q4_0,0.35,0.38 +mixtral-instruct-v0.1,"46,7",ggufv2,Q4_K_M,0.35,0.37 +llama-2-chat,13,ggmlv3,q3_K_M,0.34,0.35 +mixtral-instruct-v0.1,"46,7",ggufv2,Q5_0,0.33,0.28 +llama-2-chat,7,ggmlv3,q2_K,0.33,0.3 +llama-2-chat,13,ggmlv3,q2_K,0.3,0.4 +mixtral-instruct-v0.1,"46,7",ggufv2,Q8_0,0.28,0.34 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index 78942d0a..58cd4ccb 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,27 +1,28 @@ -Model name,Size,Version,Quantisation,Mean,SD -gpt-3.5-turbo,NA,,NA,0.8777573529411764,0.1582457961403944 -llama-2-chat,7,ggmlv3,q3_K_M,0.51640625,0.3396479320906245 -llama-2-chat,7,ggmlv3,q5_0,0.4864583333333333,0.3166101237896715 -llama-2-chat,7,ggmlv3,q5_K_M,0.465625,0.37768569522156914 -llama-2-chat,13,ggmlv3,q5_0,0.4609375,0.5330278157317402 -llama-2-chat,7,ggmlv3,q4_0,0.45,0.39370039370059057 -llama-2-chat,7,ggmlv3,q4_K_S,0.4447916666666667,0.37288611373909386 -llama-2-chat,13,ggmlv3,q4_1,0.4296875,0.4754084387069186 -llama-2-chat,7,ggmlv3,q4_1,0.415625,0.34976275441361676 -llama-2-chat,13,ggmlv3,q4_0,0.4078125,0.3993616732909531 -llama-2-chat,7,ggmlv3,q4_K_M,0.4075520833333333,0.347885380173866 -llama-2-chat,7,ggmlv3,q8_0,0.403125,0.3447003907381017 -llama-2-chat,13,ggmlv3,q5_K_M,0.3984375,0.4261215669779576 -llama-2-chat,13,ggmlv3,q4_K_S,0.3802083333333333,0.3938753658448881 -llama-2-chat,13,ggmlv3,q8_0,0.3776041666666667,0.3994404587939581 -llama-2-chat,13,ggmlv3,q4_K_M,0.3776041666666667,0.3994404587939581 -llama-2-chat,13,ggmlv3,q6_K,0.3776041666666667,0.3994404587939581 -llama-2-chat,7,ggmlv3,q6_K,0.35703125,0.3447632438876533 -mixtral-instruct-v0.1,"46,7",ggufv2,Q2_K,0.35026041666666663,0.3887084057720928 -mixtral-instruct-v0.1,"46,7",ggufv2,Q4_0,0.34895833333333337,0.379565666895876 -mixtral-instruct-v0.1,"46,7",ggufv2,Q4_K_M,0.34791666666666665,0.3718793767249447 -llama-2-chat,13,ggmlv3,q3_K_M,0.3359375,0.34617790000931764 -mixtral-instruct-v0.1,"46,7",ggufv2,Q5_0,0.33125,0.2758116179770372 -llama-2-chat,7,ggmlv3,q2_K,0.325765931372549,0.29627404541647706 -llama-2-chat,13,ggmlv3,q2_K,0.2994791666666667,0.4020802973762759 -mixtral-instruct-v0.1,"46,7",ggufv2,Q8_0,0.28125,0.3434341983715528 +Model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean,SD +gpt-4,0.7647058823529411,0.75,1.0,1.0,1.0,0.71875,1.0,1.0,0.9041819852941176,0.12425585940001706 +gpt-3.5-turbo,0.6470588235294118,0.6875,1.0,1.0,1.0,0.6875,1.0,1.0,0.8777573529411764,0.1582457961403944 +llama-2-chat:7:ggmlv3:q3_K_M,0.0,0.65625,1.0,0.6,0.5,0.0,0.5,0.875,0.51640625,0.3396479320906245 +llama-2-chat:7:ggmlv3:q5_0,0.0,0.625,1.0,0.6,0.5,0.0,0.5,0.6666666666666666,0.4864583333333333,0.3166101237896715 +llama-2-chat:7:ggmlv3:q5_K_M,0.0,0.625,1.0,0.6,0.0,0.0,0.75,0.75,0.465625,0.37768569522156914 +llama-2-chat:13:ggmlv3:q5_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.5,0.4609375,0.5330278157317402 +llama-2-chat:7:ggmlv3:q4_0,0.0,0.5,1.0,0.6,0.0,0.0,0.5,1.0,0.45,0.39370039370059057 +llama-2-chat:7:ggmlv3:q4_K_S,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.8333333333333334,0.4447916666666667,0.37288611373909386 +llama-2-chat:13:ggmlv3:q4_1,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.25,0.4296875,0.4754084387069186 +llama-2-chat:7:ggmlv3:q4_1,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.6,0.415625,0.34976275441361676 +llama-2-chat:13:ggmlv3:q4_0,0.0,0.5625,1.0,0.2,0.0,0.0,0.5,1.0,0.4078125,0.3993616732909531 +llama-2-chat:7:ggmlv3:q4_K_M,0.0,0.59375,1.0,0.5,0.0,0.0,0.5,0.6666666666666666,0.4075520833333333,0.347885380173866 +llama-2-chat:7:ggmlv3:q8_0,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.5,0.403125,0.3447003907381017 +llama-2-chat:13:ggmlv3:q5_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.0,0.3984375,0.4261215669779576 +llama-2-chat:13:ggmlv3:q4_K_S,0.0,0.625,1.0,0.0,0.0,0.0,0.75,0.6666666666666666,0.3802083333333333,0.3938753658448881 +llama-2-chat:13:ggmlv3:q8_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 +llama-2-chat:13:ggmlv3:q6_K,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 +llama-2-chat:13:ggmlv3:q4_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 +llama-2-chat:7:ggmlv3:q6_K,0.0,0.65625,1.0,0.2,0.0,0.0,0.5,0.5,0.35703125,0.3447632438876533 +mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.71875,0.3333333333333333,0.0,0.0,0.0,0.75,1.0,0.35026041666666663,0.3887084057720928 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.75,0.4166666666666667,0.0,0.0,0.0,1.0,0.625,0.34895833333333337,0.379565666895876 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,0.75,0.1666666666666666,0.2,0.0,0.0,1.0,0.6666666666666666,0.34791666666666665,0.3718793767249447 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,0.6875,0.8333333333333334,0.0,0.0,0.0,0.5,0.6666666666666666,0.3359375,0.34617790000931764 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,0.75,0.0833333333333333,0.4,0.25,0.0,0.5,0.6666666666666666,0.33125,0.2758116179770372 +llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.4375,0.8333333333333334,0.6,0.0,0.0,0.5,0.0,0.325765931372549,0.29627404541647706 +llama-2-chat:13:ggmlv3:q2_K,0.0,0.5625,0.8333333333333334,0.0,0.0,0.0,1.0,0.0,0.2994791666666667,0.4020802973762759 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,0.75,0.0833333333333333,0.0,0.0,0.0,0.75,0.6666666666666666,0.28125,0.3434341983715528 diff --git a/benchmark/results/preprocessed_for_frontend/property_exists.csv b/benchmark/results/preprocessed_for_frontend/property_exists.csv index adc4483c..792b2e9a 100644 --- a/benchmark/results/preprocessed_for_frontend/property_exists.csv +++ b/benchmark/results/preprocessed_for_frontend/property_exists.csv @@ -3,21 +3,22 @@ llama-2-chat:13:ggmlv3:q5_0,1.5,1.0,1.5,2 llama-2-chat:13:ggmlv3:q4_1,2.5,2.0,1.25,2 gpt-3.5-turbo,4.0,4.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2.0,2.0,1.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,3.0,3.0,1.0,2 llama-2-chat:7:ggmlv3:q4_0,4.0,4.0,1.0,2 llama-2-chat:13:ggmlv3:q4_0,1.0,1.0,1.0,2 +gpt-4,4.0,4.0,1.0,2 +llama-2-chat:13:ggmlv3:q5_K_M,3.0,3.0,1.0,2 llama-2-chat:7:ggmlv3:q3_K_M,3.5,4.0,0.875,2 llama-2-chat:7:ggmlv3:q4_K_S,2.5,3.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q6_K,2.5,3.0,0.8333333333333334,2 llama-2-chat:13:ggmlv3:q8_0,2.5,3.0,0.8333333333333334,2 llama-2-chat:13:ggmlv3:q4_K_M,2.5,3.0,0.8333333333333334,2 +llama-2-chat:13:ggmlv3:q6_K,2.5,3.0,0.8333333333333334,2 llama-2-chat:7:ggmlv3:q5_K_M,3.0,4.0,0.75,2 -llama-2-chat:7:ggmlv3:q5_0,2.0,3.0,0.6666666666666666,2 +llama-2-chat:13:ggmlv3:q4_K_S,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 -llama-2-chat:13:ggmlv3:q3_K_M,2.0,3.0,0.6666666666666666,2 -llama-2-chat:13:ggmlv3:q4_K_S,2.0,3.0,0.6666666666666666,2 +llama-2-chat:7:ggmlv3:q5_0,2.0,3.0,0.6666666666666666,2 llama-2-chat:7:ggmlv3:q4_K_M,2.0,3.0,0.6666666666666666,2 +llama-2-chat:13:ggmlv3:q3_K_M,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,4.0,0.625,2 llama-2-chat:7:ggmlv3:q4_1,3.0,5.0,0.6,2 diff --git a/benchmark/results/preprocessed_for_frontend/property_selection.csv b/benchmark/results/preprocessed_for_frontend/property_selection.csv index 6855dd0f..2afb3a24 100644 --- a/benchmark/results/preprocessed_for_frontend/property_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/property_selection.csv @@ -1,4 +1,5 @@ Model name,Passed test cases,Total test cases,Score,Iterations +gpt-4,13.0,17.0,0.7647058823529411,2 gpt-3.5-turbo,11.0,17.0,0.6470588235294118,2 llama-2-chat:7:ggmlv3:q2_K,4.0,17.0,0.23529411764705882,2 llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 @@ -12,9 +13,8 @@ llama-2-chat:7:ggmlv3:q5_K_M,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q5_0,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_S,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q6_K,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q5_K_M,0.0,6.0,0.0,2 @@ -24,4 +24,5 @@ llama-2-chat:13:ggmlv3:q4_K_M,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/query_generation.csv b/benchmark/results/preprocessed_for_frontend/query_generation.csv index 7b2c8126..813d416a 100644 --- a/benchmark/results/preprocessed_for_frontend/query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/query_generation.csv @@ -3,6 +3,7 @@ mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,12.0,16.0,0.75,2 +gpt-4,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,11.5,16.0,0.71875,2 llama-2-chat:13:ggmlv3:q5_K_M,11.0,16.0,0.6875,2 llama-2-chat:13:ggmlv3:q8_0,11.0,16.0,0.6875,2 @@ -10,18 +11,18 @@ llama-2-chat:13:ggmlv3:q6_K,11.0,16.0,0.6875,2 gpt-3.5-turbo,11.0,16.0,0.6875,2 llama-2-chat:13:ggmlv3:q5_0,11.0,16.0,0.6875,2 llama-2-chat:13:ggmlv3:q3_K_M,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q4_1,11.0,16.0,0.6875,2 llama-2-chat:13:ggmlv3:q4_K_M,11.0,16.0,0.6875,2 -llama-2-chat:7:ggmlv3:q3_K_M,10.5,16.0,0.65625,2 +llama-2-chat:13:ggmlv3:q4_1,11.0,16.0,0.6875,2 llama-2-chat:7:ggmlv3:q6_K,10.5,16.0,0.65625,2 +llama-2-chat:7:ggmlv3:q3_K_M,10.5,16.0,0.65625,2 +llama-2-chat:7:ggmlv3:q4_K_S,10.0,16.0,0.625,2 +llama-2-chat:13:ggmlv3:q4_K_S,10.0,16.0,0.625,2 llama-2-chat:7:ggmlv3:q5_K_M,10.0,16.0,0.625,2 +llama-2-chat:7:ggmlv3:q4_1,10.0,16.0,0.625,2 llama-2-chat:7:ggmlv3:q8_0,10.0,16.0,0.625,2 llama-2-chat:7:ggmlv3:q5_0,10.0,16.0,0.625,2 -llama-2-chat:13:ggmlv3:q4_K_S,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q4_K_S,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q4_1,10.0,16.0,0.625,2 llama-2-chat:7:ggmlv3:q4_K_M,9.5,16.0,0.59375,2 -llama-2-chat:13:ggmlv3:q2_K,9.0,16.0,0.5625,2 llama-2-chat:13:ggmlv3:q4_0,9.0,16.0,0.5625,2 +llama-2-chat:13:ggmlv3:q2_K,9.0,16.0,0.5625,2 llama-2-chat:7:ggmlv3:q4_0,8.0,16.0,0.5,2 llama-2-chat:7:ggmlv3:q2_K,7.0,16.0,0.4375,2 diff --git a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv index 0c1dbaeb..a3ef02bf 100644 --- a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv @@ -1,19 +1,23 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,6.0,6.0,1.0,2 +gpt-4,6.0,6.0,1.0,2 llama-2-chat:7:ggmlv3:q5_0,3.0,6.0,0.5,2 llama-2-chat:7:ggmlv3:q3_K_M,3.0,6.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.5,6.0,0.25,2 -llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q6_K,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q4_K_S,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 +llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 llama-2-chat:7:ggmlv3:q2_K,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q6_K,0.0,6.0,0.0,2 @@ -21,7 +25,4 @@ llama-2-chat:13:ggmlv3:q5_K_M,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_K_S,0.0,6.0,0.0,2 llama-2-chat:13:ggmlv3:q4_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/docs/scripts/hooks.py b/docs/scripts/hooks.py index a6deac9b..8e9b6a4b 100644 --- a/docs/scripts/hooks.py +++ b/docs/scripts/hooks.py @@ -125,13 +125,21 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]): overview["SD"] = overview.std(axis=1) overview = overview.sort_values(by="Mean", ascending=False) # split "Model name" at : to get Model name, size, version, and quantisation - overview["Model name"] = overview.index - overview[["Model name", "Size", "Version", "Quantisation"]] = overview[ - "Model name" - ].str.split(":", expand=True) + overview.to_csv( + f"{result_files_path}preprocessed_for_frontend/overview.csv", + index=True, + ) + + overview_aggregated = overview + overview_aggregated["Model name"] = overview_aggregated.index + overview_aggregated[["Model name", "Size", "Version", "Quantisation"]] = ( + overview_aggregated["Model name"].str.split(":", expand=True) + ) # convert underscores in Size to commas - overview["Size"] = overview["Size"].str.replace("_", ",") - overview = overview[ + overview_aggregated["Size"] = overview_aggregated["Size"].str.replace( + "_", "," + ) + overview_aggregated = overview_aggregated[ [ "Model name", "Size", @@ -141,14 +149,6 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]): "SD", ] ] - overview.to_csv( - f"{result_files_path}preprocessed_for_frontend/overview.csv", - index=False, - ) - - overview_aggregated = overview[ - ["Model name", "Size", "Quantisation", "Mean", "SD"] - ] # round mean and sd to 2 decimal places overview_aggregated["Mean"] = overview_aggregated["Mean"].round(2) overview_aggregated["SD"] = overview_aggregated["SD"].round(2) From fe9b56ce50f8c39dd4c3186f5355aa467a6cf15f Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Mon, 5 Feb 2024 02:17:41 +0100 Subject: [PATCH 28/32] update xinference to access llama2 70B .. adjust tests for ggufv2 models --- benchmark/conftest.py | 32 +- .../results/end_to_end_query_generation.csv | 80 +- benchmark/results/entity_selection.csv | 81 +- ...explicit_relevance_of_single_fragments.csv | 240 ++--- ...plicit_relevance_of_multiple_fragments.csv | 80 +- .../end_to_end_query_generation.csv | 40 +- .../entity_selection.csv | 40 +- ...explicit_relevance_of_single_fragments.csv | 40 +- ...plicit_relevance_of_multiple_fragments.csv | 40 +- .../overview-aggregated.csv | 40 +- .../preprocessed_for_frontend/overview.csv | 40 +- .../property_exists.csv | 40 +- .../property_selection.csv | 40 +- .../query_generation.csv | 40 +- .../relationship_selection.csv | 40 +- benchmark/results/property_exists.csv | 80 +- benchmark/results/property_selection.csv | 80 +- benchmark/results/query_generation.csv | 80 +- benchmark/results/relationship_selection.csv | 80 +- poetry.lock | 977 +++++++++++------- pyproject.toml | 2 +- 21 files changed, 1196 insertions(+), 1016 deletions(-) diff --git a/benchmark/conftest.py b/benchmark/conftest.py index 7e8522f9..dc803fc7 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -28,24 +28,24 @@ "model_size_in_billions": [ 7, 13, - # 70, + 70, ], - "model_format": "ggmlv3", + "model_format": "ggufv2", "quantization": [ - "q2_K", - # "q3_K_L", - "q3_K_M", - # "q3_K_S", - "q4_0", - "q4_1", - "q4_K_M", - "q4_K_S", - "q5_0", - # "q5_1", - "q5_K_M", - # "q5_K_S", - "q6_K", - "q8_0", + "Q2_K", + # "Q3_K_L", + "Q3_K_M", + # "Q3_K_S", + "Q4_0", + "Q4_1", + "Q4_K_M", + "Q4_K_S", + "Q5_0", + # "Q5_1", + "Q5_K_M", + # "Q5_K_S", + "Q6_K", + "Q8_0", ], }, "mixtral-instruct-v0.1": { diff --git a/benchmark/results/end_to_end_query_generation.csv b/benchmark/results/end_to_end_query_generation.csv index a6fbfff2..975af2b2 100644 --- a/benchmark/results/end_to_end_query_generation.csv +++ b/benchmark/results/end_to_end_query_generation.csv @@ -3,46 +3,46 @@ gpt-3.5-turbo,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec gpt-4,multi_word,5.5/8,2,f29b6faf7d003159d43a5d1cf451587f gpt-4,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q3_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_1,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_1,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_S,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_S,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q6_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q6_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q3_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_1,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_1,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_S,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_S,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q6_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q6_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q3_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_1,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_1,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_S,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_S,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q6_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q6_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q3_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_1,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_1,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_S,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_S,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q6_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q6_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 3cf47398..4de953b5 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -3,46 +3,47 @@ gpt-3.5-turbo,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec gpt-4,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f gpt-4,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_0,single_word,1.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_1,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_S,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q6_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q8_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q2_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q2_K,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q3_K_M,single_word,1.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_1,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_1,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_M,single_word,1.5/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_S,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_S,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_K_M,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q6_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q8_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q2_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q2_K,single_word,0.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q3_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_0,single_word,1.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_1,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_S,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q6_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q8_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q2_K,single_word,0.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q2_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q2_K,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q3_K_M,single_word,1.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_1,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_1,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_M,single_word,1.5/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_S,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_S,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_K_M,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q6_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q8_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index 9a3a604d..79b31719 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -11,126 +11,126 @@ gpt-4,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3 gpt-4,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 gpt-4,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 gpt-4,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q2_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q2_K,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q3_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q3_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q3_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q3_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q3_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q3_K_M,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q4_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q4_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q4_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q4_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q4_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q4_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q4_1,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q4_1,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q4_1,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q4_1,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q4_1,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q4_1,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q4_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q4_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q4_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q4_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q4_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q4_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q4_K_S,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q4_K_S,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q4_K_S,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q4_K_S,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q4_K_S,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q4_K_S,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q5_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q5_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q5_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q5_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q5_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q5_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q5_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q5_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q5_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q5_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q5_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q5_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q6_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q6_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q6_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q6_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q6_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q6_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:13:ggmlv3:q8_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:13:ggmlv3:q8_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:13:ggmlv3:q8_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:13:ggmlv3:q8_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:13:ggmlv3:q8_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:13:ggmlv3:q8_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q2_K,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q2_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q2_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q2_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q3_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q3_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q3_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q3_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q3_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q3_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q4_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q4_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q4_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q4_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q4_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q4_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q4_1,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q4_1,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q4_1,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q4_1,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q4_1,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q4_1,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q4_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q4_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q4_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q4_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q4_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q4_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q4_K_S,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q4_K_S,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q4_K_S,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q4_K_S,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q4_K_S,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q4_K_S,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q5_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q5_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q5_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q5_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q5_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q5_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q5_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q5_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q5_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q5_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q5_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q5_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q6_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q6_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q6_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q6_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q6_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q6_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 -llama-2-chat:7:ggmlv3:q8_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 -llama-2-chat:7:ggmlv3:q8_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 -llama-2-chat:7:ggmlv3:q8_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 -llama-2-chat:7:ggmlv3:q8_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 -llama-2-chat:7:ggmlv3:q8_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 -llama-2-chat:7:ggmlv3:q8_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q2_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q2_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q2_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q2_K,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q3_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q3_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q3_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q3_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q3_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q3_K_M,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q4_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q4_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q4_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q4_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q4_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q4_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q4_1,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q4_1,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q4_1,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q4_1,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q4_1,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q4_1,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q4_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q4_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q4_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q4_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q4_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q4_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q4_K_S,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q4_K_S,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q4_K_S,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q4_K_S,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q4_K_S,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q4_K_S,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q5_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q5_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q5_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q5_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q5_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q5_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q5_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q5_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q5_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q5_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q5_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q5_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q6_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q6_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q6_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q6_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q6_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q6_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:13:ggufv2:Q8_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:13:ggufv2:Q8_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:13:ggufv2:Q8_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:13:ggufv2:Q8_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:13:ggufv2:Q8_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:13:ggufv2:Q8_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q2_K,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q2_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q2_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q2_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q3_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q3_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q3_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q3_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q3_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q3_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q4_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q4_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q4_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q4_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q4_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q4_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q4_1,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q4_1,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q4_1,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q4_1,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q4_1,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q4_1,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q4_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q4_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q4_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q4_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q4_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q4_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q4_K_S,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q4_K_S,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q4_K_S,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q4_K_S,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q4_K_S,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q4_K_S,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q5_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q5_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q5_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q5_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q5_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q5_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q5_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q5_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q5_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q5_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q5_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q5_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q6_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q6_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q6_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q6_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q6_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q6_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:7:ggufv2:Q8_0,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:7:ggufv2:Q8_0,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:7:ggufv2:Q8_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:7:ggufv2:Q8_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:7:ggufv2:Q8_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:7:ggufv2:Q8_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_no_more_explicit,0.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv index 43051e43..b2358bac 100644 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -3,46 +3,46 @@ gpt-3.5-turbo,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 gpt-3.5-turbo,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 gpt-4,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 gpt-4,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q2_K,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q3_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q4_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q4_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q4_1,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q4_1,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q4_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q4_K_S,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q4_K_S,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q5_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q5_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q5_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q5_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q6_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q6_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:13:ggmlv3:q8_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:13:ggmlv3:q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q2_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q3_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q4_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q4_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q4_1,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q4_1,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q4_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q4_K_S,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q4_K_S,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q5_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q5_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q5_K_M,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q5_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q6_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q6_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 -llama-2-chat:7:ggmlv3:q8_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 -llama-2-chat:7:ggmlv3:q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q2_K,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q3_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q4_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q4_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q4_1,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q4_1,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q4_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q4_K_S,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q4_K_S,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q5_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q5_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q5_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q5_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q6_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q6_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:13:ggufv2:Q8_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:13:ggufv2:Q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q2_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q3_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q4_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q4_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q4_1,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q4_1,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q4_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q4_K_S,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q4_K_S,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q5_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q5_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q5_K_M,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q5_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q6_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q6_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:7:ggufv2:Q8_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:7:ggufv2:Q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 diff --git a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv index ba143461..4bb10cb5 100644 --- a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv @@ -1,28 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-4,11.5,16.0,0.71875,2 gpt-3.5-turbo,11.0,16.0,0.6875,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_0,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q8_0,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q6_K,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q5_K_M,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q5_0,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_S,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_M,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_1,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q3_K_M,0.0,16.0,0.0,2 -llama-2-chat:7:ggmlv3:q2_K,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q6_K,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_S,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_1,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_0,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,16.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q8_0,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q6_K,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q5_K_M,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q5_0,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_K_S,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_K_M,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_1,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q3_K_M,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q2_K,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q8_0,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q6_K,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q5_K_M,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q5_0,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_K_S,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_K_M,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_1,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_0,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q3_K_M,0.0,16.0,0.0,2 +llama-2-chat:13:ggufv2:Q2_K,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,16.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/entity_selection.csv b/benchmark/results/preprocessed_for_frontend/entity_selection.csv index bd93b80f..8cb1b2a8 100644 --- a/benchmark/results/preprocessed_for_frontend/entity_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/entity_selection.csv @@ -1,28 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,4.0,4.0,1.0,2 gpt-4,4.0,4.0,1.0,2 -llama-2-chat:7:ggmlv3:q2_K,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q8_0,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q5_K_M,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q5_0,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q4_K_S,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q4_1,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q4_0,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q3_K_M,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q4_K_M,2.5,5.0,0.5,2 +llama-2-chat:7:ggufv2:Q2_K,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q8_0,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q5_K_M,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q5_0,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q4_K_S,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q4_1,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q4_0,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q3_K_M,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q4_K_M,2.5,5.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,5.0,0.4,2 -llama-2-chat:13:ggmlv3:q4_0,1.0,5.0,0.2,2 +llama-2-chat:13:ggufv2:Q4_0,1.0,5.0,0.2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,5.0,0.2,2 -llama-2-chat:7:ggmlv3:q6_K,1.0,5.0,0.2,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,5.0,0.0,2 +llama-2-chat:7:ggufv2:Q6_K,1.0,5.0,0.2,2 +llama-2-chat:13:ggufv2:Q3_K_M,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q6_K,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_1,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_S,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,5.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q2_K,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q6_K,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q8_0,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_1,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_K_M,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_K_S,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q5_0,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q5_K_M,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,5.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv index 6c2b93eb..73331d63 100644 --- a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv @@ -1,26 +1,26 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,6.0,6.0,1.0,2 -llama-2-chat:13:ggmlv3:q8_0,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q8_0,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q6_K,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q5_K_M,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q5_0,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_K_S,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_K_M,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_1,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_0,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q8_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q8_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q6_K,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q5_K_M,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q5_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q4_K_S,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q4_K_M,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q4_1,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q4_0,6.0,6.0,1.0,2 gpt-4,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q3_K_M,6.0,6.0,1.0,2 -llama-2-chat:13:ggmlv3:q6_K,6.0,6.0,1.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,6.0,6.0,1.0,2 -llama-2-chat:13:ggmlv3:q5_0,6.0,6.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_K_S,6.0,6.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_K_M,6.0,6.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_1,6.0,6.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_0,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q2_K,5.0,6.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q3_K_M,5.0,6.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q2_K,5.0,6.0,0.8333333333333334,2 +llama-2-chat:7:ggufv2:Q3_K_M,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q6_K,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q5_K_M,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q5_0,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q4_K_S,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q4_K_M,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q4_1,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q4_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q2_K,5.0,6.0,0.8333333333333334,2 +llama-2-chat:13:ggufv2:Q3_K_M,5.0,6.0,0.8333333333333334,2 +llama-2-chat:13:ggufv2:Q2_K,5.0,6.0,0.8333333333333334,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,6.0,0.4166666666666667,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2.0,6.0,0.3333333333333333,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,6.0,0.16666666666666666,2 diff --git a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv index aeed7928..b6da22e9 100644 --- a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv @@ -1,28 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,2.0,2.0,1.0,2 gpt-4,2.0,2.0,1.0,2 -llama-2-chat:13:ggmlv3:q2_K,2.0,2.0,1.0,2 +llama-2-chat:13:ggufv2:Q2_K,2.0,2.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,2.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.0,2.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.5,2.0,0.75,2 -llama-2-chat:7:ggmlv3:q5_K_M,1.5,2.0,0.75,2 +llama-2-chat:7:ggufv2:Q5_K_M,1.5,2.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1.5,2.0,0.75,2 -llama-2-chat:13:ggmlv3:q4_K_S,1.5,2.0,0.75,2 -llama-2-chat:13:ggmlv3:q5_0,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q5_0,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q4_K_S,1.5,2.0,0.75,2 +llama-2-chat:13:ggufv2:Q5_0,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q5_0,1.0,2.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,2.0,0.5,2 -llama-2-chat:13:ggmlv3:q3_K_M,1.0,2.0,0.5,2 -llama-2-chat:13:ggmlv3:q4_0,1.0,2.0,0.5,2 -llama-2-chat:13:ggmlv3:q4_1,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q8_0,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q6_K,1.0,2.0,0.5,2 -llama-2-chat:13:ggmlv3:q4_K_M,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_K_S,1.0,2.0,0.5,2 -llama-2-chat:13:ggmlv3:q5_K_M,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_K_M,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_1,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q4_0,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q2_K,1.0,2.0,0.5,2 -llama-2-chat:13:ggmlv3:q8_0,1.0,2.0,0.5,2 -llama-2-chat:13:ggmlv3:q6_K,1.0,2.0,0.5,2 -llama-2-chat:7:ggmlv3:q3_K_M,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q3_K_M,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q4_0,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q4_1,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q8_0,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q6_K,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q4_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q4_K_S,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q5_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q4_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q4_1,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q4_0,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q2_K,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q8_0,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q6_K,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q3_K_M,1.0,2.0,0.5,2 diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv index e841ef0b..0ca0cf15 100644 --- a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv +++ b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv @@ -1,28 +1,28 @@ Model name,Size,Version,Quantisation,Mean,SD gpt-4,,,,0.9,0.12 gpt-3.5-turbo,,,,0.88,0.16 -llama-2-chat,7,ggmlv3,q3_K_M,0.52,0.34 -llama-2-chat,7,ggmlv3,q5_0,0.49,0.32 -llama-2-chat,7,ggmlv3,q5_K_M,0.47,0.38 -llama-2-chat,13,ggmlv3,q5_0,0.46,0.53 -llama-2-chat,7,ggmlv3,q4_0,0.45,0.39 -llama-2-chat,7,ggmlv3,q4_K_S,0.44,0.37 -llama-2-chat,13,ggmlv3,q4_1,0.43,0.48 -llama-2-chat,7,ggmlv3,q4_1,0.42,0.35 -llama-2-chat,13,ggmlv3,q4_0,0.41,0.4 -llama-2-chat,7,ggmlv3,q4_K_M,0.41,0.35 -llama-2-chat,7,ggmlv3,q8_0,0.4,0.34 -llama-2-chat,13,ggmlv3,q5_K_M,0.4,0.43 -llama-2-chat,13,ggmlv3,q4_K_S,0.38,0.39 -llama-2-chat,13,ggmlv3,q8_0,0.38,0.4 -llama-2-chat,13,ggmlv3,q6_K,0.38,0.4 -llama-2-chat,13,ggmlv3,q4_K_M,0.38,0.4 -llama-2-chat,7,ggmlv3,q6_K,0.36,0.34 +llama-2-chat,7,ggufv2,q3_K_M,0.52,0.34 +llama-2-chat,7,ggufv2,q5_0,0.49,0.32 +llama-2-chat,7,ggufv2,q5_K_M,0.47,0.38 +llama-2-chat,13,ggufv2,q5_0,0.46,0.53 +llama-2-chat,7,ggufv2,q4_0,0.45,0.39 +llama-2-chat,7,ggufv2,q4_K_S,0.44,0.37 +llama-2-chat,13,ggufv2,q4_1,0.43,0.48 +llama-2-chat,7,ggufv2,q4_1,0.42,0.35 +llama-2-chat,13,ggufv2,q4_0,0.41,0.4 +llama-2-chat,7,ggufv2,q4_K_M,0.41,0.35 +llama-2-chat,7,ggufv2,q8_0,0.4,0.34 +llama-2-chat,13,ggufv2,q5_K_M,0.4,0.43 +llama-2-chat,13,ggufv2,q4_K_S,0.38,0.39 +llama-2-chat,13,ggufv2,q8_0,0.38,0.4 +llama-2-chat,13,ggufv2,q6_K,0.38,0.4 +llama-2-chat,13,ggufv2,q4_K_M,0.38,0.4 +llama-2-chat,7,ggufv2,q6_K,0.36,0.34 mixtral-instruct-v0.1,"46,7",ggufv2,Q2_K,0.35,0.39 mixtral-instruct-v0.1,"46,7",ggufv2,Q4_0,0.35,0.38 mixtral-instruct-v0.1,"46,7",ggufv2,Q4_K_M,0.35,0.37 -llama-2-chat,13,ggmlv3,q3_K_M,0.34,0.35 +llama-2-chat,13,ggufv2,q3_K_M,0.34,0.35 mixtral-instruct-v0.1,"46,7",ggufv2,Q5_0,0.33,0.28 -llama-2-chat,7,ggmlv3,q2_K,0.33,0.3 -llama-2-chat,13,ggmlv3,q2_K,0.3,0.4 +llama-2-chat,7,ggufv2,q2_K,0.33,0.3 +llama-2-chat,13,ggufv2,q2_K,0.3,0.4 mixtral-instruct-v0.1,"46,7",ggufv2,Q8_0,0.28,0.34 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index 58cd4ccb..cc56c684 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,28 +1,28 @@ Model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean,SD gpt-4,0.7647058823529411,0.75,1.0,1.0,1.0,0.71875,1.0,1.0,0.9041819852941176,0.12425585940001706 gpt-3.5-turbo,0.6470588235294118,0.6875,1.0,1.0,1.0,0.6875,1.0,1.0,0.8777573529411764,0.1582457961403944 -llama-2-chat:7:ggmlv3:q3_K_M,0.0,0.65625,1.0,0.6,0.5,0.0,0.5,0.875,0.51640625,0.3396479320906245 -llama-2-chat:7:ggmlv3:q5_0,0.0,0.625,1.0,0.6,0.5,0.0,0.5,0.6666666666666666,0.4864583333333333,0.3166101237896715 -llama-2-chat:7:ggmlv3:q5_K_M,0.0,0.625,1.0,0.6,0.0,0.0,0.75,0.75,0.465625,0.37768569522156914 -llama-2-chat:13:ggmlv3:q5_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.5,0.4609375,0.5330278157317402 -llama-2-chat:7:ggmlv3:q4_0,0.0,0.5,1.0,0.6,0.0,0.0,0.5,1.0,0.45,0.39370039370059057 -llama-2-chat:7:ggmlv3:q4_K_S,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.8333333333333334,0.4447916666666667,0.37288611373909386 -llama-2-chat:13:ggmlv3:q4_1,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.25,0.4296875,0.4754084387069186 -llama-2-chat:7:ggmlv3:q4_1,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.6,0.415625,0.34976275441361676 -llama-2-chat:13:ggmlv3:q4_0,0.0,0.5625,1.0,0.2,0.0,0.0,0.5,1.0,0.4078125,0.3993616732909531 -llama-2-chat:7:ggmlv3:q4_K_M,0.0,0.59375,1.0,0.5,0.0,0.0,0.5,0.6666666666666666,0.4075520833333333,0.347885380173866 -llama-2-chat:7:ggmlv3:q8_0,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.5,0.403125,0.3447003907381017 -llama-2-chat:13:ggmlv3:q5_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.0,0.3984375,0.4261215669779576 -llama-2-chat:13:ggmlv3:q4_K_S,0.0,0.625,1.0,0.0,0.0,0.0,0.75,0.6666666666666666,0.3802083333333333,0.3938753658448881 -llama-2-chat:13:ggmlv3:q8_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 -llama-2-chat:13:ggmlv3:q6_K,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 -llama-2-chat:7:ggmlv3:q6_K,0.0,0.65625,1.0,0.2,0.0,0.0,0.5,0.5,0.35703125,0.3447632438876533 +llama-2-chat:7:ggufv2:Q3_K_M,0.0,0.65625,1.0,0.6,0.5,0.0,0.5,0.875,0.51640625,0.3396479320906245 +llama-2-chat:7:ggufv2:Q5_0,0.0,0.625,1.0,0.6,0.5,0.0,0.5,0.6666666666666666,0.4864583333333333,0.3166101237896715 +llama-2-chat:7:ggufv2:Q5_K_M,0.0,0.625,1.0,0.6,0.0,0.0,0.75,0.75,0.465625,0.37768569522156914 +llama-2-chat:13:ggufv2:Q5_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.5,0.4609375,0.5330278157317402 +llama-2-chat:7:ggufv2:Q4_0,0.0,0.5,1.0,0.6,0.0,0.0,0.5,1.0,0.45,0.39370039370059057 +llama-2-chat:7:ggufv2:Q4_K_S,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.8333333333333334,0.4447916666666667,0.37288611373909386 +llama-2-chat:13:ggufv2:Q4_1,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.25,0.4296875,0.4754084387069186 +llama-2-chat:7:ggufv2:Q4_1,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.6,0.415625,0.34976275441361676 +llama-2-chat:13:ggufv2:Q4_0,0.0,0.5625,1.0,0.2,0.0,0.0,0.5,1.0,0.4078125,0.3993616732909531 +llama-2-chat:7:ggufv2:Q4_K_M,0.0,0.59375,1.0,0.5,0.0,0.0,0.5,0.6666666666666666,0.4075520833333333,0.347885380173866 +llama-2-chat:7:ggufv2:Q8_0,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.5,0.403125,0.3447003907381017 +llama-2-chat:13:ggufv2:Q5_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.0,0.3984375,0.4261215669779576 +llama-2-chat:13:ggufv2:Q4_K_S,0.0,0.625,1.0,0.0,0.0,0.0,0.75,0.6666666666666666,0.3802083333333333,0.3938753658448881 +llama-2-chat:13:ggufv2:Q8_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 +llama-2-chat:13:ggufv2:Q6_K,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 +llama-2-chat:13:ggufv2:Q4_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 +llama-2-chat:7:ggufv2:Q6_K,0.0,0.65625,1.0,0.2,0.0,0.0,0.5,0.5,0.35703125,0.3447632438876533 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.71875,0.3333333333333333,0.0,0.0,0.0,0.75,1.0,0.35026041666666663,0.3887084057720928 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.75,0.4166666666666667,0.0,0.0,0.0,1.0,0.625,0.34895833333333337,0.379565666895876 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,0.75,0.1666666666666666,0.2,0.0,0.0,1.0,0.6666666666666666,0.34791666666666665,0.3718793767249447 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,0.6875,0.8333333333333334,0.0,0.0,0.0,0.5,0.6666666666666666,0.3359375,0.34617790000931764 +llama-2-chat:13:ggufv2:Q3_K_M,0.0,0.6875,0.8333333333333334,0.0,0.0,0.0,0.5,0.6666666666666666,0.3359375,0.34617790000931764 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,0.75,0.0833333333333333,0.4,0.25,0.0,0.5,0.6666666666666666,0.33125,0.2758116179770372 -llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.4375,0.8333333333333334,0.6,0.0,0.0,0.5,0.0,0.325765931372549,0.29627404541647706 -llama-2-chat:13:ggmlv3:q2_K,0.0,0.5625,0.8333333333333334,0.0,0.0,0.0,1.0,0.0,0.2994791666666667,0.4020802973762759 +llama-2-chat:7:ggufv2:Q2_K,0.2352941176470588,0.4375,0.8333333333333334,0.6,0.0,0.0,0.5,0.0,0.325765931372549,0.29627404541647706 +llama-2-chat:13:ggufv2:Q2_K,0.0,0.5625,0.8333333333333334,0.0,0.0,0.0,1.0,0.0,0.2994791666666667,0.4020802973762759 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,0.75,0.0833333333333333,0.0,0.0,0.0,0.75,0.6666666666666666,0.28125,0.3434341983715528 diff --git a/benchmark/results/preprocessed_for_frontend/property_exists.csv b/benchmark/results/preprocessed_for_frontend/property_exists.csv index 792b2e9a..27db2be5 100644 --- a/benchmark/results/preprocessed_for_frontend/property_exists.csv +++ b/benchmark/results/preprocessed_for_frontend/property_exists.csv @@ -1,28 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations -llama-2-chat:13:ggmlv3:q5_0,1.5,1.0,1.5,2 -llama-2-chat:13:ggmlv3:q4_1,2.5,2.0,1.25,2 +llama-2-chat:13:ggufv2:Q5_0,1.5,1.0,1.5,2 +llama-2-chat:13:ggufv2:Q4_1,2.5,2.0,1.25,2 gpt-3.5-turbo,4.0,4.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2.0,2.0,1.0,2 -llama-2-chat:7:ggmlv3:q4_0,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q4_0,1.0,1.0,1.0,2 +llama-2-chat:7:ggufv2:Q4_0,4.0,4.0,1.0,2 +llama-2-chat:13:ggufv2:Q4_0,1.0,1.0,1.0,2 gpt-4,4.0,4.0,1.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,3.0,3.0,1.0,2 -llama-2-chat:7:ggmlv3:q3_K_M,3.5,4.0,0.875,2 -llama-2-chat:7:ggmlv3:q4_K_S,2.5,3.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q8_0,2.5,3.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q4_K_M,2.5,3.0,0.8333333333333334,2 -llama-2-chat:13:ggmlv3:q6_K,2.5,3.0,0.8333333333333334,2 -llama-2-chat:7:ggmlv3:q5_K_M,3.0,4.0,0.75,2 -llama-2-chat:13:ggmlv3:q4_K_S,2.0,3.0,0.6666666666666666,2 +llama-2-chat:13:ggufv2:Q5_K_M,3.0,3.0,1.0,2 +llama-2-chat:7:ggufv2:Q3_K_M,3.5,4.0,0.875,2 +llama-2-chat:7:ggufv2:Q4_K_S,2.5,3.0,0.8333333333333334,2 +llama-2-chat:13:ggufv2:Q8_0,2.5,3.0,0.8333333333333334,2 +llama-2-chat:13:ggufv2:Q4_K_M,2.5,3.0,0.8333333333333334,2 +llama-2-chat:13:ggufv2:Q6_K,2.5,3.0,0.8333333333333334,2 +llama-2-chat:7:ggufv2:Q5_K_M,3.0,4.0,0.75,2 +llama-2-chat:13:ggufv2:Q4_K_S,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 -llama-2-chat:7:ggmlv3:q5_0,2.0,3.0,0.6666666666666666,2 -llama-2-chat:7:ggmlv3:q4_K_M,2.0,3.0,0.6666666666666666,2 -llama-2-chat:13:ggmlv3:q3_K_M,2.0,3.0,0.6666666666666666,2 +llama-2-chat:7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 +llama-2-chat:7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 +llama-2-chat:13:ggufv2:Q3_K_M,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.0,3.0,0.6666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,4.0,0.625,2 -llama-2-chat:7:ggmlv3:q4_1,3.0,5.0,0.6,2 -llama-2-chat:7:ggmlv3:q6_K,2.0,4.0,0.5,2 -llama-2-chat:7:ggmlv3:q8_0,2.5,5.0,0.5,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,0.0,0.0,2 -llama-2-chat:7:ggmlv3:q2_K,1.0,0.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_1,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q6_K,2.0,4.0,0.5,2 +llama-2-chat:7:ggufv2:Q8_0,2.5,5.0,0.5,2 +llama-2-chat:13:ggufv2:Q2_K,0.0,0.0,0.0,2 +llama-2-chat:7:ggufv2:Q2_K,1.0,0.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/property_selection.csv b/benchmark/results/preprocessed_for_frontend/property_selection.csv index 2afb3a24..bf109ef4 100644 --- a/benchmark/results/preprocessed_for_frontend/property_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/property_selection.csv @@ -1,28 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-4,13.0,17.0,0.7647058823529411,2 gpt-3.5-turbo,11.0,17.0,0.6470588235294118,2 -llama-2-chat:7:ggmlv3:q2_K,4.0,17.0,0.23529411764705882,2 -llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q2_K,4.0,17.0,0.23529411764705882,2 +llama-2-chat:7:ggufv2:Q4_1,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q6_K,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q5_K_M,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q5_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_S,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q3_K_M,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q6_K,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_S,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q8_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q6_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q5_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_K_S,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q8_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q6_K,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q5_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_K_S,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q2_K,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/query_generation.csv b/benchmark/results/preprocessed_for_frontend/query_generation.csv index 813d416a..3600570e 100644 --- a/benchmark/results/preprocessed_for_frontend/query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/query_generation.csv @@ -5,24 +5,24 @@ mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,12.0,16.0,0.75,2 gpt-4,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,11.5,16.0,0.71875,2 -llama-2-chat:13:ggmlv3:q5_K_M,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q8_0,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q6_K,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q5_K_M,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q8_0,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q6_K,11.0,16.0,0.6875,2 gpt-3.5-turbo,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q5_0,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q3_K_M,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q4_K_M,11.0,16.0,0.6875,2 -llama-2-chat:13:ggmlv3:q4_1,11.0,16.0,0.6875,2 -llama-2-chat:7:ggmlv3:q6_K,10.5,16.0,0.65625,2 -llama-2-chat:7:ggmlv3:q3_K_M,10.5,16.0,0.65625,2 -llama-2-chat:7:ggmlv3:q4_K_S,10.0,16.0,0.625,2 -llama-2-chat:13:ggmlv3:q4_K_S,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q5_K_M,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q4_1,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q8_0,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q5_0,10.0,16.0,0.625,2 -llama-2-chat:7:ggmlv3:q4_K_M,9.5,16.0,0.59375,2 -llama-2-chat:13:ggmlv3:q4_0,9.0,16.0,0.5625,2 -llama-2-chat:13:ggmlv3:q2_K,9.0,16.0,0.5625,2 -llama-2-chat:7:ggmlv3:q4_0,8.0,16.0,0.5,2 -llama-2-chat:7:ggmlv3:q2_K,7.0,16.0,0.4375,2 +llama-2-chat:13:ggufv2:Q5_0,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q3_K_M,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q4_K_M,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q4_1,11.0,16.0,0.6875,2 +llama-2-chat:7:ggufv2:Q6_K,10.5,16.0,0.65625,2 +llama-2-chat:7:ggufv2:Q3_K_M,10.5,16.0,0.65625,2 +llama-2-chat:7:ggufv2:Q4_K_S,10.0,16.0,0.625,2 +llama-2-chat:13:ggufv2:Q4_K_S,10.0,16.0,0.625,2 +llama-2-chat:7:ggufv2:Q5_K_M,10.0,16.0,0.625,2 +llama-2-chat:7:ggufv2:Q4_1,10.0,16.0,0.625,2 +llama-2-chat:7:ggufv2:Q8_0,10.0,16.0,0.625,2 +llama-2-chat:7:ggufv2:Q5_0,10.0,16.0,0.625,2 +llama-2-chat:7:ggufv2:Q4_K_M,9.5,16.0,0.59375,2 +llama-2-chat:13:ggufv2:Q4_0,9.0,16.0,0.5625,2 +llama-2-chat:13:ggufv2:Q2_K,9.0,16.0,0.5625,2 +llama-2-chat:7:ggufv2:Q4_0,8.0,16.0,0.5,2 +llama-2-chat:7:ggufv2:Q2_K,7.0,16.0,0.4375,2 diff --git a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv index a3ef02bf..1f953cb7 100644 --- a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv @@ -1,28 +1,28 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,6.0,6.0,1.0,2 gpt-4,6.0,6.0,1.0,2 -llama-2-chat:7:ggmlv3:q5_0,3.0,6.0,0.5,2 -llama-2-chat:7:ggmlv3:q3_K_M,3.0,6.0,0.5,2 +llama-2-chat:7:ggufv2:Q5_0,3.0,6.0,0.5,2 +llama-2-chat:7:ggufv2:Q3_K_M,3.0,6.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.5,6.0,0.25,2 -llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q6_K,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q5_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_K_S,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_1,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_1,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q3_K_M,0.0,6.0,0.0,2 -llama-2-chat:7:ggmlv3:q2_K,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q6_K,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_S,0.0,6.0,0.0,2 -llama-2-chat:13:ggmlv3:q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q8_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q6_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q2_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_K_S,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_1,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q2_K,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q8_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q6_K,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q5_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_K_S,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/property_exists.csv b/benchmark/results/property_exists.csv index 2d6f1eef..802e8598 100644 --- a/benchmark/results/property_exists.csv +++ b/benchmark/results/property_exists.csv @@ -3,46 +3,46 @@ gpt-3.5-turbo,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec gpt-4,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f gpt-4,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/0,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/0,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q3_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q3_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/0,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_1,multi_word,1.5/1,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_1,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_M,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_S,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_S,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_0,multi_word,0.5/0,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q6_K,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q6_K,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q8_0,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q8_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q2_K,multi_word,1.0/0,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q2_K,single_word,0.0/0,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q3_K_M,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_0,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_0,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_1,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_1,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_S,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_S,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_K_M,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_K_M,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q6_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q6_K,single_word,1.0/2,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q8_0,single_word,1.5/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q2_K,multi_word,0.0/0,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q2_K,single_word,0.0/0,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q3_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q3_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_0,multi_word,0.0/0,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_1,multi_word,1.5/1,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_1,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_M,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_S,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_S,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_0,multi_word,0.5/0,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q6_K,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q6_K,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q8_0,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q8_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q2_K,multi_word,1.0/0,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q2_K,single_word,0.0/0,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q3_K_M,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_0,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_0,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_1,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_1,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_S,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_S,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_K_M,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_K_M,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q6_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q6_K,single_word,1.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q8_0,single_word,1.5/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,1.0/1,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/property_selection.csv b/benchmark/results/property_selection.csv index 58b71a5a..e0da4238 100644 --- a/benchmark/results/property_selection.csv +++ b/benchmark/results/property_selection.csv @@ -3,46 +3,46 @@ gpt-3.5-turbo,multi_word,5.0/7,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,6.0/10,2,e447d738f5e035cde60d624eadb79fec gpt-4,multi_word,7.0/7,2,f29b6faf7d003159d43a5d1cf451587f gpt-4,single_word,6.0/10,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q2_K,multi_word,2.0/7,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q2_K,single_word,2.0/10,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q2_K,multi_word,2.0/7,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q2_K,single_word,2.0/10,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/query_generation.csv b/benchmark/results/query_generation.csv index 4bfc3407..aec1c610 100644 --- a/benchmark/results/query_generation.csv +++ b/benchmark/results/query_generation.csv @@ -3,46 +3,46 @@ gpt-3.5-turbo,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec gpt-4,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f gpt-4,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q2_K,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q2_K,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q3_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q3_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_0,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_1,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_1,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_S,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_S,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q6_K,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q6_K,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q8_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q8_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q2_K,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q2_K,single_word,3.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q3_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q3_K_M,single_word,5.5/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_0,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_0,single_word,4.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_1,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_1,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_M,multi_word,5.5/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_M,single_word,4.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_S,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_S,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q6_K,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q6_K,single_word,5.5/8,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q8_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q8_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q2_K,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q2_K,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q3_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q3_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_0,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_1,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_1,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_S,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_S,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q6_K,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q6_K,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q8_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q8_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q2_K,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q2_K,single_word,3.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q3_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q3_K_M,single_word,5.5/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_0,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_0,single_word,4.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_1,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_1,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_M,multi_word,5.5/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_M,single_word,4.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_S,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_S,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_K_M,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q6_K,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q6_K,single_word,5.5/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q8_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q8_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,5.5/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/benchmark/results/relationship_selection.csv b/benchmark/results/relationship_selection.csv index 162e861b..7704e2ef 100644 --- a/benchmark/results/relationship_selection.csv +++ b/benchmark/results/relationship_selection.csv @@ -3,46 +3,46 @@ gpt-3.5-turbo,multi_word,3.0/3,2,f29b6faf7d003159d43a5d1cf451587f gpt-3.5-turbo,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec gpt-4,multi_word,3.0/3,2,f29b6faf7d003159d43a5d1cf451587f gpt-4,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:13:ggmlv3:q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:13:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q3_K_M,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_0,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec -llama-2-chat:7:ggmlv3:q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f -llama-2-chat:7:ggmlv3:q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:13:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:13:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q3_K_M,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_1,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_1,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q4_K_S,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q4_K_S,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_0,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:7:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f diff --git a/poetry.lock b/poetry.lock index 42c1efa4..1ff3653a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -13,24 +13,24 @@ files = [ [[package]] name = "aiobotocore" -version = "2.11.1" +version = "2.11.2" description = "Async client for aws services using botocore and aiohttp" optional = true python-versions = ">=3.8" files = [ - {file = "aiobotocore-2.11.1-py3-none-any.whl", hash = "sha256:904a7ad7cc8671d662cfd596906dafe839118ea2a66332c37908e3dcfdee1e45"}, - {file = "aiobotocore-2.11.1.tar.gz", hash = "sha256:0b095af50da2d6f94e93ca959e2a4876f0f0d84d534b61b21d8e050832d04ab6"}, + {file = "aiobotocore-2.11.2-py3-none-any.whl", hash = "sha256:487fede588040bfa3a43df945275c28c1c73ca75bf705295adb9fbadd2e89be7"}, + {file = "aiobotocore-2.11.2.tar.gz", hash = "sha256:6dd7352248e3523019c5a54a395d2b1c31080697fc80a9ad2672de4eec8c7abd"}, ] [package.dependencies] aiohttp = ">=3.7.4.post0,<4.0.0" aioitertools = ">=0.5.1,<1.0.0" -botocore = ">=1.33.2,<1.34.28" +botocore = ">=1.33.2,<1.34.35" wrapt = ">=1.10.10,<2.0.0" [package.extras] -awscli = ["awscli (>=1.31.2,<1.32.28)"] -boto3 = ["boto3 (>=1.33.2,<1.34.28)"] +awscli = ["awscli (>=1.31.2,<1.32.35)"] +boto3 = ["boto3 (>=1.33.2,<1.34.35)"] [[package]] name = "aiofiles" @@ -150,6 +150,26 @@ files = [ {file = "aioitertools-0.11.0.tar.gz", hash = "sha256:42c68b8dd3a69c2bf7f2233bf7df4bb58b557bca5252ac02ed5187bbc67d6831"}, ] +[[package]] +name = "aioprometheus" +version = "23.12.0" +description = "A Prometheus Python client library for asyncio-based applications" +optional = true +python-versions = ">=3.8.0" +files = [ + {file = "aioprometheus-23.12.0-py3-none-any.whl", hash = "sha256:b1a77259131153ef820b494e76439b278434eaf2a5e25dc0b8bf3d835f455960"}, +] + +[package.dependencies] +orjson = "*" +quantile-python = ">=1.1" +starlette = {version = ">=0.14.2", optional = true, markers = "extra == \"starlette\""} + +[package.extras] +aiohttp = ["aiohttp (>=3.3.2)"] +quart = ["quart (>=0.15.1)"] +starlette = ["starlette (>=0.14.2)"] + [[package]] name = "aiosignal" version = "1.3.1" @@ -166,17 +186,16 @@ frozenlist = ">=1.1.0" [[package]] name = "aliyun-python-sdk-core" -version = "2.14.0" +version = "2.11.5" description = "The core module of Aliyun Python SDK." optional = true python-versions = "*" files = [ - {file = "aliyun-python-sdk-core-2.14.0.tar.gz", hash = "sha256:c806815a48ffdb894cc5bce15b8259b9a3012cc0cda01be2f3dfbb844f3f4f21"}, + {file = "aliyun-python-sdk-core-2.11.5.tar.gz", hash = "sha256:577265c630c02207c692ca19958bd21665d56208306a834d0885e7770553975e"}, ] [package.dependencies] -cryptography = ">=2.6.0" -jmespath = ">=0.9.3,<1.0.0" +pycryptodome = ">=3.4.7" [[package]] name = "aliyun-python-sdk-kms" @@ -281,6 +300,46 @@ files = [ [package.extras] dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"] +[[package]] +name = "bcrypt" +version = "4.1.2" +description = "Modern password hashing for your software and your servers" +optional = true +python-versions = ">=3.7" +files = [ + {file = "bcrypt-4.1.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:ac621c093edb28200728a9cca214d7e838529e557027ef0581685909acd28b5e"}, + {file = "bcrypt-4.1.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea505c97a5c465ab8c3ba75c0805a102ce526695cd6818c6de3b1a38f6f60da1"}, + {file = "bcrypt-4.1.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:57fa9442758da926ed33a91644649d3e340a71e2d0a5a8de064fb621fd5a3326"}, + {file = "bcrypt-4.1.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eb3bd3321517916696233b5e0c67fd7d6281f0ef48e66812db35fc963a422a1c"}, + {file = "bcrypt-4.1.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6cad43d8c63f34b26aef462b6f5e44fdcf9860b723d2453b5d391258c4c8e966"}, + {file = "bcrypt-4.1.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:44290ccc827d3a24604f2c8bcd00d0da349e336e6503656cb8192133e27335e2"}, + {file = "bcrypt-4.1.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:732b3920a08eacf12f93e6b04ea276c489f1c8fb49344f564cca2adb663b3e4c"}, + {file = "bcrypt-4.1.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:1c28973decf4e0e69cee78c68e30a523be441972c826703bb93099868a8ff5b5"}, + {file = "bcrypt-4.1.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b8df79979c5bae07f1db22dcc49cc5bccf08a0380ca5c6f391cbb5790355c0b0"}, + {file = "bcrypt-4.1.2-cp37-abi3-win32.whl", hash = "sha256:fbe188b878313d01b7718390f31528be4010fed1faa798c5a1d0469c9c48c369"}, + {file = "bcrypt-4.1.2-cp37-abi3-win_amd64.whl", hash = "sha256:9800ae5bd5077b13725e2e3934aa3c9c37e49d3ea3d06318010aa40f54c63551"}, + {file = "bcrypt-4.1.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:71b8be82bc46cedd61a9f4ccb6c1a493211d031415a34adde3669ee1b0afbb63"}, + {file = "bcrypt-4.1.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e3c6642077b0c8092580c819c1684161262b2e30c4f45deb000c38947bf483"}, + {file = "bcrypt-4.1.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:387e7e1af9a4dd636b9505a465032f2f5cb8e61ba1120e79a0e1cd0b512f3dfc"}, + {file = "bcrypt-4.1.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f70d9c61f9c4ca7d57f3bfe88a5ccf62546ffbadf3681bb1e268d9d2e41c91a7"}, + {file = "bcrypt-4.1.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2a298db2a8ab20056120b45e86c00a0a5eb50ec4075b6142db35f593b97cb3fb"}, + {file = "bcrypt-4.1.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ba55e40de38a24e2d78d34c2d36d6e864f93e0d79d0b6ce915e4335aa81d01b1"}, + {file = "bcrypt-4.1.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:3566a88234e8de2ccae31968127b0ecccbb4cddb629da744165db72b58d88ca4"}, + {file = "bcrypt-4.1.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b90e216dc36864ae7132cb151ffe95155a37a14e0de3a8f64b49655dd959ff9c"}, + {file = "bcrypt-4.1.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:69057b9fc5093ea1ab00dd24ede891f3e5e65bee040395fb1e66ee196f9c9b4a"}, + {file = "bcrypt-4.1.2-cp39-abi3-win32.whl", hash = "sha256:02d9ef8915f72dd6daaef40e0baeef8a017ce624369f09754baf32bb32dba25f"}, + {file = "bcrypt-4.1.2-cp39-abi3-win_amd64.whl", hash = "sha256:be3ab1071662f6065899fe08428e45c16aa36e28bc42921c4901a191fda6ee42"}, + {file = "bcrypt-4.1.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d75fc8cd0ba23f97bae88a6ec04e9e5351ff3c6ad06f38fe32ba50cbd0d11946"}, + {file = "bcrypt-4.1.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:a97e07e83e3262599434816f631cc4c7ca2aa8e9c072c1b1a7fec2ae809a1d2d"}, + {file = "bcrypt-4.1.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e51c42750b7585cee7892c2614be0d14107fad9581d1738d954a262556dd1aab"}, + {file = "bcrypt-4.1.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba4e4cc26610581a6329b3937e02d319f5ad4b85b074846bf4fef8a8cf51e7bb"}, + {file = "bcrypt-4.1.2.tar.gz", hash = "sha256:33313a1200a3ae90b75587ceac502b048b840fc69e7f7a0905b5f87fac7a1258"}, +] + +[package.extras] +tests = ["pytest (>=3.2.1,!=3.3.0)"] +typecheck = ["mypy"] + [[package]] name = "black" version = "23.12.1" @@ -340,13 +399,13 @@ files = [ [[package]] name = "botocore" -version = "1.34.27" +version = "1.34.34" description = "Low-level, data-driven core of boto 3." optional = true python-versions = ">= 3.8" files = [ - {file = "botocore-1.34.27-py3-none-any.whl", hash = "sha256:1c10f247136ad17b6ef1588c1e043e294dbaebdebe9ce84dc56713029f515c53"}, - {file = "botocore-1.34.27.tar.gz", hash = "sha256:a0e68ba264275b358b8c1cca604161f4d9465cf7847d73e929543a9f30ff22d1"}, + {file = "botocore-1.34.34-py3-none-any.whl", hash = "sha256:cd060b0d88ebb2b893f1411c1db7f2ba66cc18e52dcc57ad029564ef5fec437b"}, + {file = "botocore-1.34.34.tar.gz", hash = "sha256:54093dc97372bb7683f5c61a279aa8240408abf3b2cc494ae82a9a90c1b784b5"}, ] [package.dependencies] @@ -381,13 +440,13 @@ files = [ [[package]] name = "certifi" -version = "2023.11.17" +version = "2024.2.2" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"}, - {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"}, + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, ] [[package]] @@ -752,13 +811,13 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "dataclasses-json" -version = "0.6.3" +version = "0.6.4" description = "Easily serialize dataclasses to and from JSON." optional = false python-versions = ">=3.7,<4.0" files = [ - {file = "dataclasses_json-0.6.3-py3-none-any.whl", hash = "sha256:4aeb343357997396f6bca1acae64e486c3a723d8f5c76301888abeccf0c45176"}, - {file = "dataclasses_json-0.6.3.tar.gz", hash = "sha256:35cb40aae824736fdf959801356641836365219cfe14caeb115c39136f775d2a"}, + {file = "dataclasses_json-0.6.4-py3-none-any.whl", hash = "sha256:f90578b8a3177f7552f4e1a6e535e84293cd5da421fcce0642d49c0d7bdf8df2"}, + {file = "dataclasses_json-0.6.4.tar.gz", hash = "sha256:73696ebf24936560cca79a2430cbc4f3dd23ac7bf46ed17f38e5e5e7657a6377"}, ] [package.dependencies] @@ -767,25 +826,27 @@ typing-inspect = ">=0.4.0,<1" [[package]] name = "datasets" -version = "2.13.0" +version = "2.16.1" description = "HuggingFace community-driven open-source library of datasets" optional = true -python-versions = ">=3.7.0" +python-versions = ">=3.8.0" files = [ - {file = "datasets-2.13.0-py3-none-any.whl", hash = "sha256:26671d474990ad8fd7388e8c67cde4d72f6c1f0e87af685fc09af5d9a5992274"}, - {file = "datasets-2.13.0.tar.gz", hash = "sha256:b8c3bcf9c3d0c74f101c7645e42231de9f45206a2e742df15799da9bfa625608"}, + {file = "datasets-2.16.1-py3-none-any.whl", hash = "sha256:fafa300c78ff92d521473a3d47d60c2d3e0d6046212cc03ceb6caf6550737257"}, + {file = "datasets-2.16.1.tar.gz", hash = "sha256:ad3215e9b1984d1de4fda2123bc7319ccbdf1e17d0c3d5590d13debff308a080"}, ] [package.dependencies] aiohttp = "*" -dill = ">=0.3.0,<0.3.7" -fsspec = {version = ">=2021.11.1", extras = ["http"]} -huggingface-hub = ">=0.11.0,<1.0.0" +dill = ">=0.3.0,<0.3.8" +filelock = "*" +fsspec = {version = ">=2023.1.0,<=2023.10.0", extras = ["http"]} +huggingface-hub = ">=0.19.4" multiprocess = "*" numpy = ">=1.17" packaging = "*" pandas = "*" pyarrow = ">=8.0.0" +pyarrow-hotfix = "*" pyyaml = ">=5.1" requests = ">=2.19.0" tqdm = ">=4.62.1" @@ -794,16 +855,16 @@ xxhash = "*" [package.extras] apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"] audio = ["librosa", "soundfile (>=0.12.1)"] -benchmarks = ["numpy (==1.18.5)", "protobuf (==3.20.3)", "tensorflow (==2.3.0)", "torch (==1.7.1)", "transformers (==3.0.2)"] -dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "black (>=23.1,<24.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "ruff (>=0.0.241)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"] -docs = ["s3fs"] -jax = ["jax (>=0.2.8,!=0.3.2,<=0.3.25)", "jaxlib (>=0.1.65,<=0.3.25)"] +benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"] +dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.1.5)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] +docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"] +jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"] metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"] -quality = ["black (>=23.1,<24.0)", "pyyaml (>=5.3.1)", "ruff (>=0.0.241)"] +quality = ["ruff (>=0.1.5)"] s3 = ["s3fs"] tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"] tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"] -tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"] +tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"] torch = ["torch"] vision = ["Pillow (>=6.2.1)"] @@ -820,13 +881,13 @@ files = [ [[package]] name = "dill" -version = "0.3.6" -description = "serialize all of python" +version = "0.3.7" +description = "serialize all of Python" optional = true python-versions = ">=3.7" files = [ - {file = "dill-0.3.6-py3-none-any.whl", hash = "sha256:a07ffd2351b8c678dfc4a856a3005f8067aea51d6ba6c700796a4d9e280f39f0"}, - {file = "dill-0.3.6.tar.gz", hash = "sha256:e5db55f3687856d8fbdab002ed78544e1c4559a130302693d839dfe8f93f2373"}, + {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"}, + {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"}, ] [package.extras] @@ -854,6 +915,24 @@ files = [ {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, ] +[[package]] +name = "ecdsa" +version = "0.18.0" +description = "ECDSA cryptographic signature library (pure python)" +optional = true +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "ecdsa-0.18.0-py2.py3-none-any.whl", hash = "sha256:80600258e7ed2f16b9aa1d7c295bd70194109ad5a30fdee0eaeefef1d4c559dd"}, + {file = "ecdsa-0.18.0.tar.gz", hash = "sha256:190348041559e21b22a1d65cee485282ca11a6f81d503fddb84d5017e9ed1e49"}, +] + +[package.dependencies] +six = ">=1.9.0" + +[package.extras] +gmpy = ["gmpy"] +gmpy2 = ["gmpy2"] + [[package]] name = "einops" version = "0.7.0" @@ -902,22 +981,22 @@ test = ["pytest (>=6)"] [[package]] name = "fastapi" -version = "0.109.0" +version = "0.109.2" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = true python-versions = ">=3.8" files = [ - {file = "fastapi-0.109.0-py3-none-any.whl", hash = "sha256:8c77515984cd8e8cfeb58364f8cc7a28f0692088475e2614f7bf03275eba9093"}, - {file = "fastapi-0.109.0.tar.gz", hash = "sha256:b978095b9ee01a5cf49b19f4bc1ac9b8ca83aa076e770ef8fd9af09a2b88d191"}, + {file = "fastapi-0.109.2-py3-none-any.whl", hash = "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d"}, + {file = "fastapi-0.109.2.tar.gz", hash = "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73"}, ] [package.dependencies] pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" -starlette = ">=0.35.0,<0.36.0" +starlette = ">=0.36.3,<0.37.0" typing-extensions = ">=4.8.0" [package.extras] -all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] [[package]] name = "ffmpy" @@ -1114,13 +1193,13 @@ files = [ [[package]] name = "fsspec" -version = "2023.12.2" +version = "2023.10.0" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2023.12.2-py3-none-any.whl", hash = "sha256:d800d87f72189a745fa3d6b033b9dc4a34ad069f60ca60b943a63599f5501960"}, - {file = "fsspec-2023.12.2.tar.gz", hash = "sha256:8548d39e8810b59c38014934f6b31e57f40c1b20f911f4cc2b85389c7e9bf0cb"}, + {file = "fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529"}, + {file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"}, ] [package.dependencies] @@ -1344,13 +1423,13 @@ test = ["objgraph", "psutil"] [[package]] name = "griffe" -version = "0.39.1" +version = "0.40.0" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." optional = false python-versions = ">=3.8" files = [ - {file = "griffe-0.39.1-py3-none-any.whl", hash = "sha256:6ce4ecffcf0d2f96362c5974b3f7df812da8f8d4cfcc5ebc8202ef72656fc087"}, - {file = "griffe-0.39.1.tar.gz", hash = "sha256:ead8dfede6e6531cce6bf69090a4f3c6d36fdf923c43f8e85aa530552cef0c09"}, + {file = "griffe-0.40.0-py3-none-any.whl", hash = "sha256:db1da6d1d8e08cbb20f1a7dee8c09da940540c2d4c1bfa26a9091cf6fc36a9ec"}, + {file = "griffe-0.40.0.tar.gz", hash = "sha256:76c4439eaa2737af46ae003c331ab6ca79c5365b552f7b5aed263a3b4125735b"}, ] [package.dependencies] @@ -1623,13 +1702,13 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jmespath" -version = "0.10.0" +version = "1.0.1" description = "JSON Matching Expressions" optional = true -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +python-versions = ">=3.7" files = [ - {file = "jmespath-0.10.0-py2.py3-none-any.whl", hash = "sha256:cdf6525904cc597730141d61b36f2e4b8ecc257c420fa2f4549bac2c2d0cb72f"}, - {file = "jmespath-0.10.0.tar.gz", hash = "sha256:b85d0567b8666149a93172712e68920734333c0ce7e89b78b3e987f71e5ed4f9"}, + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, ] [[package]] @@ -1883,13 +1962,13 @@ extended-testing = ["jinja2 (>=3,<4)"] [[package]] name = "langsmith" -version = "0.0.84" +version = "0.0.86" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = ">=3.8.1,<4.0" files = [ - {file = "langsmith-0.0.84-py3-none-any.whl", hash = "sha256:9ae1ab777018e2174f68e8f53c88e7a7feb8dbf1c458b473644a3d5e22dc1eb7"}, - {file = "langsmith-0.0.84.tar.gz", hash = "sha256:dd163f89bca14c86759c651a72917c6d45f7dd18435d7bc65dc205a23dd9ec8d"}, + {file = "langsmith-0.0.86-py3-none-any.whl", hash = "sha256:7af15c36edb8c9fd9ae5c6d4fb940eb1da668b630a703d63c90c91e9be53aefb"}, + {file = "langsmith-0.0.86.tar.gz", hash = "sha256:c1572824664810c4425b17f2d1e9a59d53992e6898df22a37236c62d3c80f59e"}, ] [package.dependencies] @@ -1937,71 +2016,71 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] [[package]] name = "markupsafe" -version = "2.1.4" +version = "2.1.5" description = "Safely add untrusted strings to HTML/XML markup." optional = false python-versions = ">=3.7" files = [ - {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:de8153a7aae3835484ac168a9a9bdaa0c5eee4e0bc595503c95d53b942879c84"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e888ff76ceb39601c59e219f281466c6d7e66bd375b4ec1ce83bcdc68306796b"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0b838c37ba596fcbfca71651a104a611543077156cb0a26fe0c475e1f152ee8"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac1ebf6983148b45b5fa48593950f90ed6d1d26300604f321c74a9ca1609f8e"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0fbad3d346df8f9d72622ac71b69565e621ada2ce6572f37c2eae8dacd60385d"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d5291d98cd3ad9a562883468c690a2a238c4a6388ab3bd155b0c75dd55ece858"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a7cc49ef48a3c7a0005a949f3c04f8baa5409d3f663a1b36f0eba9bfe2a0396e"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b83041cda633871572f0d3c41dddd5582ad7d22f65a72eacd8d3d6d00291df26"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-win32.whl", hash = "sha256:0c26f67b3fe27302d3a412b85ef696792c4a2386293c53ba683a89562f9399b0"}, - {file = "MarkupSafe-2.1.4-cp310-cp310-win_amd64.whl", hash = "sha256:a76055d5cb1c23485d7ddae533229039b850db711c554a12ea64a0fd8a0129e2"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9e9e3c4020aa2dc62d5dd6743a69e399ce3de58320522948af6140ac959ab863"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0042d6a9880b38e1dd9ff83146cc3c9c18a059b9360ceae207805567aacccc69"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d03fea4c4e9fd0ad75dc2e7e2b6757b80c152c032ea1d1de487461d8140efc"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ab3a886a237f6e9c9f4f7d272067e712cdb4efa774bef494dccad08f39d8ae6"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:abf5ebbec056817057bfafc0445916bb688a255a5146f900445d081db08cbabb"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e1a0d1924a5013d4f294087e00024ad25668234569289650929ab871231668e7"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:e7902211afd0af05fbadcc9a312e4cf10f27b779cf1323e78d52377ae4b72bea"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c669391319973e49a7c6230c218a1e3044710bc1ce4c8e6eb71f7e6d43a2c131"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-win32.whl", hash = "sha256:31f57d64c336b8ccb1966d156932f3daa4fee74176b0fdc48ef580be774aae74"}, - {file = "MarkupSafe-2.1.4-cp311-cp311-win_amd64.whl", hash = "sha256:54a7e1380dfece8847c71bf7e33da5d084e9b889c75eca19100ef98027bd9f56"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a76cd37d229fc385738bd1ce4cba2a121cf26b53864c1772694ad0ad348e509e"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:987d13fe1d23e12a66ca2073b8d2e2a75cec2ecb8eab43ff5624ba0ad42764bc"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5244324676254697fe5c181fc762284e2c5fceeb1c4e3e7f6aca2b6f107e60dc"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78bc995e004681246e85e28e068111a4c3f35f34e6c62da1471e844ee1446250"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4d176cfdfde84f732c4a53109b293d05883e952bbba68b857ae446fa3119b4f"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f9917691f410a2e0897d1ef99619fd3f7dd503647c8ff2475bf90c3cf222ad74"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f06e5a9e99b7df44640767842f414ed5d7bedaaa78cd817ce04bbd6fd86e2dd6"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:396549cea79e8ca4ba65525470d534e8a41070e6b3500ce2414921099cb73e8d"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-win32.whl", hash = "sha256:f6be2d708a9d0e9b0054856f07ac7070fbe1754be40ca8525d5adccdbda8f475"}, - {file = "MarkupSafe-2.1.4-cp312-cp312-win_amd64.whl", hash = "sha256:5045e892cfdaecc5b4c01822f353cf2c8feb88a6ec1c0adef2a2e705eef0f656"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7a07f40ef8f0fbc5ef1000d0c78771f4d5ca03b4953fc162749772916b298fc4"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d18b66fe626ac412d96c2ab536306c736c66cf2a31c243a45025156cc190dc8a"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:698e84142f3f884114ea8cf83e7a67ca8f4ace8454e78fe960646c6c91c63bfa"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49a3b78a5af63ec10d8604180380c13dcd870aba7928c1fe04e881d5c792dc4e"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:15866d7f2dc60cfdde12ebb4e75e41be862348b4728300c36cdf405e258415ec"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:6aa5e2e7fc9bc042ae82d8b79d795b9a62bd8f15ba1e7594e3db243f158b5565"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:54635102ba3cf5da26eb6f96c4b8c53af8a9c0d97b64bdcb592596a6255d8518"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-win32.whl", hash = "sha256:3583a3a3ab7958e354dc1d25be74aee6228938312ee875a22330c4dc2e41beb0"}, - {file = "MarkupSafe-2.1.4-cp37-cp37m-win_amd64.whl", hash = "sha256:d6e427c7378c7f1b2bef6a344c925b8b63623d3321c09a237b7cc0e77dd98ceb"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bf1196dcc239e608605b716e7b166eb5faf4bc192f8a44b81e85251e62584bd2"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4df98d4a9cd6a88d6a585852f56f2155c9cdb6aec78361a19f938810aa020954"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b835aba863195269ea358cecc21b400276747cc977492319fd7682b8cd2c253d"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23984d1bdae01bee794267424af55eef4dfc038dc5d1272860669b2aa025c9e3"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c98c33ffe20e9a489145d97070a435ea0679fddaabcafe19982fe9c971987d5"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9896fca4a8eb246defc8b2a7ac77ef7553b638e04fbf170bff78a40fa8a91474"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b0fe73bac2fed83839dbdbe6da84ae2a31c11cfc1c777a40dbd8ac8a6ed1560f"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c7556bafeaa0a50e2fe7dc86e0382dea349ebcad8f010d5a7dc6ba568eaaa789"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-win32.whl", hash = "sha256:fc1a75aa8f11b87910ffd98de62b29d6520b6d6e8a3de69a70ca34dea85d2a8a"}, - {file = "MarkupSafe-2.1.4-cp38-cp38-win_amd64.whl", hash = "sha256:3a66c36a3864df95e4f62f9167c734b3b1192cb0851b43d7cc08040c074c6279"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:765f036a3d00395a326df2835d8f86b637dbaf9832f90f5d196c3b8a7a5080cb"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21e7af8091007bf4bebf4521184f4880a6acab8df0df52ef9e513d8e5db23411"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c31fe855c77cad679b302aabc42d724ed87c043b1432d457f4976add1c2c3e"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7653fa39578957bc42e5ebc15cf4361d9e0ee4b702d7d5ec96cdac860953c5b4"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47bb5f0142b8b64ed1399b6b60f700a580335c8e1c57f2f15587bd072012decc"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:fe8512ed897d5daf089e5bd010c3dc03bb1bdae00b35588c49b98268d4a01e00"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:36d7626a8cca4d34216875aee5a1d3d654bb3dac201c1c003d182283e3205949"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b6f14a9cd50c3cb100eb94b3273131c80d102e19bb20253ac7bd7336118a673a"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-win32.whl", hash = "sha256:c8f253a84dbd2c63c19590fa86a032ef3d8cc18923b8049d91bcdeeb2581fbf6"}, - {file = "MarkupSafe-2.1.4-cp39-cp39-win_amd64.whl", hash = "sha256:8b570a1537367b52396e53325769608f2a687ec9a4363647af1cded8928af959"}, - {file = "MarkupSafe-2.1.4.tar.gz", hash = "sha256:3aae9af4cac263007fd6309c64c6ab4506dd2b79382d9d19a1994f9240b8db4f"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"}, + {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"}, + {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"}, + {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"}, + {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"}, + {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"}, + {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"}, + {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"}, ] [[package]] @@ -2152,13 +2231,13 @@ mkdocs = ">=1.1" [[package]] name = "mkdocs-material" -version = "9.5.6" +version = "9.5.7" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.5.6-py3-none-any.whl", hash = "sha256:e115b90fccf5cd7f5d15b0c2f8e6246b21041628b8f590630e7fca66ed7fcf6c"}, - {file = "mkdocs_material-9.5.6.tar.gz", hash = "sha256:5b24df36d8ac6cecd611241ce6f6423ccde3e1ad89f8360c3f76d5565fc2d82a"}, + {file = "mkdocs_material-9.5.7-py3-none-any.whl", hash = "sha256:0be8ce8bcfebb52bae9b00cf9b851df45b8a92d629afcfd7f2c09b2dfa155ea3"}, + {file = "mkdocs_material-9.5.7.tar.gz", hash = "sha256:16110292575d88a338d2961f3cb665cf12943ff8829e551a9b364f24019e46af"}, ] [package.dependencies] @@ -2192,13 +2271,13 @@ files = [ [[package]] name = "mkdocs-table-reader-plugin" -version = "2.0.3" +version = "2.1.0" description = "MkDocs plugin to directly insert tables from files into markdown." optional = false -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "mkdocs-table-reader-plugin-2.0.3.tar.gz", hash = "sha256:e14136a6103d43030215491ff0a6cdcf8f92a78c3bac32a509e4cf2b365ef529"}, - {file = "mkdocs_table_reader_plugin-2.0.3-py3-none-any.whl", hash = "sha256:7d1abb3838ac2ab7b1a5a5cc7f40a4e966a77defa7a7ae1f2137efd185b96752"}, + {file = "mkdocs-table-reader-plugin-2.1.0.tar.gz", hash = "sha256:cef5a1127e2c04955d5716e67ede66971a0a5ee3dd5386af6cba53c707c78b8b"}, + {file = "mkdocs_table_reader_plugin-2.1.0-py3-none-any.whl", hash = "sha256:b98041627b4982c5a5c6dac6f2e97767c53658771eed926057aa48c61a4e494f"}, ] [package.dependencies] @@ -2251,19 +2330,19 @@ mkdocstrings = ">=0.20" [[package]] name = "modelscope" -version = "1.9.5" +version = "1.11.1" description = "ModelScope: bring the notion of Model-as-a-Service to life." optional = true python-versions = "*" files = [ - {file = "modelscope-1.9.5-py3-none-any.whl", hash = "sha256:43630a55316f9806230b674aa8fe5f3ef90f731974392875dd0c5180da472c60"}, - {file = "modelscope-1.9.5.tar.gz", hash = "sha256:10837de92333a497f8f16e3aa39710dcbde8dc6d80aec41a4201960458cecf04"}, + {file = "modelscope-1.11.1-py3-none-any.whl", hash = "sha256:dbbfe71092e428e78f64fa6502836371aa9439879e2496f2ae74d6868d2028df"}, + {file = "modelscope-1.11.1.tar.gz", hash = "sha256:87bd055d924f6db94dd456e2422bfa69dfe02deb884cbf779b6e49256f9be2b0"}, ] [package.dependencies] addict = "*" attrs = "*" -datasets = ">=2.8.0,<=2.13.0" +datasets = ">=2.14.5" einops = "*" filelock = ">=3.3.0" gast = ">=0.2.2" @@ -2284,14 +2363,14 @@ urllib3 = ">=1.26" yapf = "*" [package.extras] -all = ["PyMCubes", "accelerate", "albumentations (>=1.0.3)", "av (>=9.2.0)", "biopython", "bmt-clipit (>=1.0)", "boto3", "chumpy", "clip (>=1.0)", "cloudpickle", "control-ldm", "ddpm-guided-diffusion", "decord (>=0.6.0)", "diffusers", "diffusers (>=0.19.0)", "easydict", "easyrobust", "edit-distance", "embeddings", "face-alignment (>=1.3.5)", "fairscale (>=0.4.1)", "fairseq", "fastai (>=1.0.51)", "ffmpeg (>=1.4)", "ffmpeg-python (>=0.2.0)", "filelock", "ftfy", "ftfy (>=6.0.3)", "fvcore", "healpy", "imageio (>=2.9.0)", "imageio-ffmpeg (>=0.4.2)", "imgaug (>=0.4.0)", "iopath", "ipdb", "jieba (>=0.42.1)", "kornia (>=0.5.0)", "lap", "librosa (==0.9.2)", "lmdb", "lpips", "matplotlib", "megatron-util", "ml-collections", "mmcls (>=0.21.0)", "mmdet (>=2.25.0,<=2.28.2)", "mmdet3d (==1.0.0a1)", "mmsegmentation (<=0.30.0)", "moviepy (>=1.0.3)", "nerfacc (==0.2.2)", "networkx", "nltk", "numba", "omegaconf", "onnx", "onnxruntime (>=1.10)", "onnxsim", "open-clip-torch (>=2.7.0)", "opencv-python", "paint-ldm", "pandas", "panopticapi", "plyfile (>=0.7.4)", "protobuf (>=3.19.0,<3.21.0)", "psutil", "pyclipper", "pycocoevalcap (>=1.2)", "pycocotools (>=2.0.4)", "pydot", "pythainlp", "pytorch-lightning", "pytorch-lightning (<=1.7.7)", "pyvi", "rapidfuzz", "regex", "rouge", "rouge-score (<=0.0.4)", "sacrebleu", "sacremoses (>=0.0.41)", "safetensors", "scikit-image (>=0.19.3,<0.20.0)", "scikit-learn", "scikit-learn (>=0.20.1)", "scipy", "sentencepiece", "seqeval", "shapely", "shotdetect-scenedetect-lgss (>=0.0.4)", "smplx", "soundfile", "spacy (>=2.3.5)", "stanza", "subword-nmt (>=0.3.8)", "taming-transformers-rom1504", "tensorboardX", "tensorflow-estimator (>=1.15.1)", "termcolor", "tf-slim", "thop", "timm", "timm (>=0.4.9)", "tokenizers", "torch-scatter", "torchmetrics (>=0.6.2)", "torchsummary (>=1.5.1)", "torchvision", "tqdm", "transformers (>=4.12.0)", "transformers (>=4.26.0)", "transformers (>=4.27.1)", "trimesh", "ujson", "unicodedata2", "utils", "videofeatures-clipit (>=1.0)", "yacs", "zhconv"] -audio = ["MinDAEC", "PyWavelets (>=1.0.0)", "SoundFile (>0.10)", "bitstring", "funasr (>=0.6.5)", "greenlet (>=1.1.2)", "hdbscan", "hyperpyyaml", "inflect", "jedi (>=0.18.1)", "kaldiio", "kantts", "kwsbp (>=0.0.6)", "librosa (==0.9.2)", "lxml", "matplotlib", "mir-eval (>=0.7)", "msgpack (>=1.0.4)", "parso (>=0.8.3)", "pexpect (>=4.8.0)", "pickleshare (>=0.7.5)", "prompt-toolkit (>=3.0.30)", "protobuf", "ptflops", "ptyprocess (>=0.7.0)", "py-sound-connect (>=0.1)", "pygments (>=2.12.0)", "pysptk (>=0.1.15,<0.1.19)", "pytorch-wavelets", "rotary-embedding-torch (>=0.1.5)", "scikit-learn", "scipy", "sox", "speechbrain (>=0.5.12)", "tensorboardX", "tensorboardx", "torchaudio", "tqdm", "traitlets (>=5.3.0)", "ttsfrd (>=0.1.2)", "umap-learn", "unidecode", "wcwidth (>=0.2.5)"] -audio-asr = ["funasr (>=0.6.5)"] +all = ["PyMCubes", "accelerate", "albumentations (>=1.0.3)", "av (>=9.2.0)", "biopython", "bmt-clipit (>=1.0)", "boto3", "chumpy", "clip (>=1.0)", "cloudpickle", "control-ldm", "ddpm-guided-diffusion", "decord (>=0.6.0)", "diffusers", "diffusers (>=0.25.0)", "easydict", "easyrobust", "edit-distance", "embeddings", "face-alignment (>=1.3.5)", "fairscale (>=0.4.1)", "fairseq", "fastai (>=1.0.51)", "ffmpeg (>=1.4)", "ffmpeg-python (>=0.2.0)", "filelock", "ftfy", "ftfy (>=6.0.3)", "fvcore", "imageio (>=2.9.0)", "imageio-ffmpeg (>=0.4.2)", "imgaug (>=0.4.0)", "iopath", "ipdb", "jieba (>=0.42.1)", "kornia (>=0.5.0)", "lap", "librosa (==0.10.1)", "lmdb", "lpips", "matplotlib", "megatron-util", "ml-collections", "mmcls (>=0.21.0)", "mmdet (>=2.25.0,<=2.28.2)", "mmdet3d (==1.0.0a1)", "mmsegmentation (<=0.30.0)", "moviepy (>=1.0.3)", "nerfacc (==0.2.2)", "networkx", "nltk", "numba", "omegaconf", "onnx", "onnxruntime (>=1.10)", "onnxsim", "open-clip-torch (>=2.7.0)", "opencv-python", "paint-ldm", "pandas", "panopticapi", "plyfile (>=0.7.4)", "protobuf (>=3.19.0,<3.21.0)", "psutil", "pyclipper", "pycocoevalcap (>=1.2)", "pycocotools (>=2.0.4)", "pydot", "pythainlp", "pytorch-lightning", "pytorch-lightning (<=1.7.7)", "pyvi", "rapidfuzz", "regex", "rouge", "rouge-score (<=0.0.4)", "sacrebleu", "sacremoses (>=0.0.41)", "safetensors", "scikit-image (>=0.19.3,<0.20.0)", "scikit-learn", "scikit-learn (>=0.20.1)", "scipy", "sentencepiece", "seqeval", "shapely", "shotdetect-scenedetect-lgss (>=0.0.4)", "smplx", "soundfile", "spacy (>=2.3.5)", "stanza", "subword-nmt (>=0.3.8)", "taming-transformers-rom1504", "tensorboardX", "tensorflow-estimator (>=1.15.1)", "termcolor", "tf-slim", "thop", "timm", "timm (>=0.4.9)", "tokenizers", "torch-scatter", "torchmetrics (>=0.6.2)", "torchsummary (>=1.5.1)", "torchvision", "tqdm", "transformers (>=4.12.0)", "transformers (>=4.26.0)", "transformers (>=4.27.1)", "trimesh", "ujson", "unicodedata2", "utils", "videofeatures-clipit (>=1.0)", "yacs", "zhconv"] +audio = ["MinDAEC", "PyWavelets (>=1.0.0)", "SoundFile (>0.10)", "bitstring", "funasr (>=1.0.0)", "funcodec (>=0.2.0)", "greenlet (>=1.1.2)", "hdbscan", "hyperpyyaml", "inflect", "jedi (>=0.18.1)", "kaldiio", "kantts", "kwsbp (>=0.0.6)", "librosa (==0.10.1)", "lxml", "matplotlib", "mir-eval (>=0.7)", "msgpack (>=1.0.4)", "parso (>=0.8.3)", "pexpect (>=4.8.0)", "pickleshare (>=0.7.5)", "prompt-toolkit (>=3.0.30)", "protobuf", "ptflops", "ptyprocess (>=0.7.0)", "py-sound-connect (>=0.1)", "pygments (>=2.12.0)", "pysptk (>=0.1.15,<0.1.19)", "pytorch-wavelets", "rotary-embedding-torch (>=0.1.5)", "scikit-learn", "scipy", "sox", "speechbrain (>=0.5.12)", "tensorboardX", "torchaudio", "tqdm", "traitlets (>=5.3.0)", "ttsfrd (>=0.1.2)", "umap-learn", "unidecode", "wcwidth (>=0.2.5)"] +audio-asr = ["funasr (>=1.0.0)"] audio-kws = ["SoundFile (>0.10)", "kaldiio", "kwsbp (>=0.0.6)", "matplotlib", "py-sound-connect (>=0.1)", "scipy", "tensorboardX"] -audio-signal = ["MinDAEC", "SoundFile (>0.10)", "hdbscan", "hyperpyyaml", "librosa (==0.9.2)", "mir-eval (>=0.7)", "rotary-embedding-torch (>=0.1.5)", "scipy", "speechbrain (>=0.5.12)", "torchaudio", "tqdm", "umap-learn"] -audio-tts = ["PyWavelets (>=1.0.0)", "bitstring", "greenlet (>=1.1.2)", "inflect", "jedi (>=0.18.1)", "kantts", "librosa (==0.9.2)", "lxml", "matplotlib", "msgpack (>=1.0.4)", "parso (>=0.8.3)", "pexpect (>=4.8.0)", "pickleshare (>=0.7.5)", "prompt-toolkit (>=3.0.30)", "protobuf", "ptflops", "ptyprocess (>=0.7.0)", "pygments (>=2.12.0)", "pysptk (>=0.1.15,<0.1.19)", "pytorch-wavelets", "scikit-learn", "sox", "tensorboardx", "tqdm", "traitlets (>=5.3.0)", "ttsfrd (>=0.1.2)", "unidecode", "wcwidth (>=0.2.5)"] -cv = ["PyMCubes", "accelerate", "albumentations (>=1.0.3)", "av (>=9.2.0)", "bmt-clipit (>=1.0)", "chumpy", "clip (>=1.0)", "control-ldm", "ddpm-guided-diffusion", "diffusers", "easydict", "easyrobust", "edit-distance", "face-alignment (>=1.3.5)", "fairscale (>=0.4.1)", "fastai (>=1.0.51)", "ffmpeg (>=1.4)", "ffmpeg-python (>=0.2.0)", "ftfy", "fvcore", "healpy", "imageio (>=2.9.0)", "imageio-ffmpeg (>=0.4.2)", "imgaug (>=0.4.0)", "kornia (>=0.5.0)", "lap", "lmdb", "lpips", "ml-collections", "mmcls (>=0.21.0)", "mmdet (>=2.25.0,<=2.28.2)", "mmdet3d (==1.0.0a1)", "mmsegmentation (<=0.30.0)", "moviepy (>=1.0.3)", "nerfacc (==0.2.2)", "networkx", "numba", "omegaconf", "onnx", "onnxruntime (>=1.10)", "onnxsim", "open-clip-torch (>=2.7.0)", "opencv-python", "paint-ldm", "pandas", "panopticapi", "plyfile (>=0.7.4)", "psutil", "pyclipper", "pytorch-lightning", "regex", "scikit-image (>=0.19.3,<0.20.0)", "scikit-learn (>=0.20.1)", "shapely", "shotdetect-scenedetect-lgss (>=0.0.4)", "smplx", "tensorflow-estimator (>=1.15.1)", "tf-slim", "thop", "timm (>=0.4.9)", "torch-scatter", "torchmetrics (>=0.6.2)", "torchsummary (>=1.5.1)", "torchvision", "tqdm", "transformers (>=4.26.0)", "trimesh", "ujson", "utils", "videofeatures-clipit (>=1.0)", "yacs"] -multi-modal = ["accelerate", "cloudpickle", "decord (>=0.6.0)", "diffusers (>=0.19.0)", "fairseq", "ftfy (>=6.0.3)", "librosa (==0.9.2)", "opencv-python", "pycocoevalcap (>=1.2)", "pycocotools (>=2.0.4)", "pydot", "pytorch-lightning (<=1.7.7)", "rapidfuzz", "rouge-score (<=0.0.4)", "sacrebleu", "safetensors", "soundfile", "taming-transformers-rom1504", "timm", "tokenizers", "torchvision", "transformers (>=4.27.1)", "unicodedata2", "zhconv"] +audio-signal = ["MinDAEC", "SoundFile (>0.10)", "hdbscan", "hyperpyyaml", "librosa (==0.10.1)", "mir-eval (>=0.7)", "rotary-embedding-torch (>=0.1.5)", "scipy", "speechbrain (>=0.5.12)", "torchaudio", "tqdm", "umap-learn"] +audio-tts = ["PyWavelets (>=1.0.0)", "bitstring", "greenlet (>=1.1.2)", "inflect", "jedi (>=0.18.1)", "kantts", "librosa (==0.10.1)", "lxml", "matplotlib", "msgpack (>=1.0.4)", "parso (>=0.8.3)", "pexpect (>=4.8.0)", "pickleshare (>=0.7.5)", "prompt-toolkit (>=3.0.30)", "protobuf", "ptflops", "ptyprocess (>=0.7.0)", "pygments (>=2.12.0)", "pysptk (>=0.1.15,<0.1.19)", "pytorch-wavelets", "scikit-learn", "sox", "tensorboardx", "tqdm", "traitlets (>=5.3.0)", "ttsfrd (>=0.1.2)", "unidecode", "wcwidth (>=0.2.5)"] +cv = ["PyMCubes", "accelerate", "albumentations (>=1.0.3)", "av (>=9.2.0)", "bmt-clipit (>=1.0)", "chumpy", "clip (>=1.0)", "control-ldm", "ddpm-guided-diffusion", "diffusers", "easydict", "easyrobust", "edit-distance", "face-alignment (>=1.3.5)", "fairscale (>=0.4.1)", "fastai (>=1.0.51)", "ffmpeg (>=1.4)", "ffmpeg-python (>=0.2.0)", "ftfy", "fvcore", "imageio (>=2.9.0)", "imageio-ffmpeg (>=0.4.2)", "imgaug (>=0.4.0)", "kornia (>=0.5.0)", "lap", "lmdb", "lpips", "ml-collections", "mmcls (>=0.21.0)", "mmdet (>=2.25.0,<=2.28.2)", "mmdet3d (==1.0.0a1)", "mmsegmentation (<=0.30.0)", "moviepy (>=1.0.3)", "nerfacc (==0.2.2)", "networkx", "numba", "omegaconf", "onnx", "onnxruntime (>=1.10)", "onnxsim", "open-clip-torch (>=2.7.0)", "opencv-python", "paint-ldm", "pandas", "panopticapi", "plyfile (>=0.7.4)", "psutil", "pyclipper", "pytorch-lightning", "regex", "scikit-image (>=0.19.3,<0.20.0)", "scikit-learn (>=0.20.1)", "shapely", "shotdetect-scenedetect-lgss (>=0.0.4)", "smplx", "tensorflow-estimator (>=1.15.1)", "tf-slim", "thop", "timm (>=0.4.9)", "torch-scatter", "torchmetrics (>=0.6.2)", "torchsummary (>=1.5.1)", "torchvision", "tqdm", "transformers (>=4.26.0)", "trimesh", "ujson", "utils", "videofeatures-clipit (>=1.0)", "yacs"] +multi-modal = ["accelerate", "cloudpickle", "decord (>=0.6.0)", "diffusers (>=0.25.0)", "fairseq", "ftfy (>=6.0.3)", "librosa (==0.10.1)", "opencv-python", "pycocoevalcap (>=1.2)", "pycocotools (>=2.0.4)", "pydot", "pytorch-lightning (<=1.7.7)", "rapidfuzz", "rouge-score (<=0.0.4)", "sacrebleu", "safetensors", "soundfile", "taming-transformers-rom1504", "timm", "tokenizers", "torchvision", "transformers (>=4.27.1)", "unicodedata2", "zhconv"] nlp = ["boto3", "embeddings", "filelock", "ftfy", "jieba (>=0.42.1)", "matplotlib", "megatron-util", "nltk", "pandas", "protobuf (>=3.19.0,<3.21.0)", "pythainlp", "pyvi", "regex", "rouge", "sacremoses (>=0.0.41)", "scikit-learn", "sentencepiece", "seqeval", "spacy (>=2.3.5)", "stanza", "subword-nmt (>=0.3.8)", "termcolor", "tokenizers", "transformers (>=4.12.0)", "zhconv"] science = ["biopython", "iopath", "ipdb", "lmdb", "ml-collections", "scipy", "tensorboardX", "tokenizers"] @@ -2314,112 +2393,130 @@ tests = ["pytest (>=4.6)"] [[package]] name = "multidict" -version = "6.0.4" +version = "6.0.5" description = "multidict implementation" optional = false python-versions = ">=3.7" files = [ - {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, - {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, - {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, - {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, - {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, - {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, - {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, - {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, - {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, - {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, - {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, - {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, - {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, - {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, - {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, - {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, - {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, - {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, - {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, - {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, - {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, - {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, - {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, - {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, - {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, - {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, - {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, - {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"}, + {file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"}, + {file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"}, + {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"}, + {file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"}, + {file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e"}, + {file = "multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed"}, + {file = "multidict-6.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc"}, + {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e"}, + {file = "multidict-6.0.5-cp311-cp311-win32.whl", hash = "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c"}, + {file = "multidict-6.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b"}, + {file = "multidict-6.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226"}, + {file = "multidict-6.0.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6"}, + {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda"}, + {file = "multidict-6.0.5-cp312-cp312-win32.whl", hash = "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5"}, + {file = "multidict-6.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556"}, + {file = "multidict-6.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626"}, + {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3"}, + {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc"}, + {file = "multidict-6.0.5-cp37-cp37m-win32.whl", hash = "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee"}, + {file = "multidict-6.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d"}, + {file = "multidict-6.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50"}, + {file = "multidict-6.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461"}, + {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44"}, + {file = "multidict-6.0.5-cp38-cp38-win32.whl", hash = "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241"}, + {file = "multidict-6.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9"}, + {file = "multidict-6.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c"}, + {file = "multidict-6.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479"}, + {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c"}, + {file = "multidict-6.0.5-cp39-cp39-win32.whl", hash = "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b"}, + {file = "multidict-6.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755"}, + {file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"}, + {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"}, ] [[package]] name = "multiprocess" -version = "0.70.14" -description = "better multiprocessing and multithreading in python" +version = "0.70.15" +description = "better multiprocessing and multithreading in Python" optional = true python-versions = ">=3.7" files = [ - {file = "multiprocess-0.70.14-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:560a27540daef4ce8b24ed3cc2496a3c670df66c96d02461a4da67473685adf3"}, - {file = "multiprocess-0.70.14-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:bfbbfa36f400b81d1978c940616bc77776424e5e34cb0c94974b178d727cfcd5"}, - {file = "multiprocess-0.70.14-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:89fed99553a04ec4f9067031f83a886d7fdec5952005551a896a4b6a59575bb9"}, - {file = "multiprocess-0.70.14-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:40a5e3685462079e5fdee7c6789e3ef270595e1755199f0d50685e72523e1d2a"}, - {file = "multiprocess-0.70.14-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:44936b2978d3f2648727b3eaeab6d7fa0bedf072dc5207bf35a96d5ee7c004cf"}, - {file = "multiprocess-0.70.14-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e628503187b5d494bf29ffc52d3e1e57bb770ce7ce05d67c4bbdb3a0c7d3b05f"}, - {file = "multiprocess-0.70.14-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0d5da0fc84aacb0e4bd69c41b31edbf71b39fe2fb32a54eaedcaea241050855c"}, - {file = "multiprocess-0.70.14-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:6a7b03a5b98e911a7785b9116805bd782815c5e2bd6c91c6a320f26fd3e7b7ad"}, - {file = "multiprocess-0.70.14-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cea5bdedd10aace3c660fedeac8b087136b4366d4ee49a30f1ebf7409bce00ae"}, - {file = "multiprocess-0.70.14-py310-none-any.whl", hash = "sha256:7dc1f2f6a1d34894c8a9a013fbc807971e336e7cc3f3ff233e61b9dc679b3b5c"}, - {file = "multiprocess-0.70.14-py37-none-any.whl", hash = "sha256:93a8208ca0926d05cdbb5b9250a604c401bed677579e96c14da3090beb798193"}, - {file = "multiprocess-0.70.14-py38-none-any.whl", hash = "sha256:6725bc79666bbd29a73ca148a0fb5f4ea22eed4a8f22fce58296492a02d18a7b"}, - {file = "multiprocess-0.70.14-py39-none-any.whl", hash = "sha256:63cee628b74a2c0631ef15da5534c8aedbc10c38910b9c8b18dcd327528d1ec7"}, - {file = "multiprocess-0.70.14.tar.gz", hash = "sha256:3eddafc12f2260d27ae03fe6069b12570ab4764ab59a75e81624fac453fbf46a"}, + {file = "multiprocess-0.70.15-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:aa36c7ed16f508091438687fe9baa393a7a8e206731d321e443745e743a0d4e5"}, + {file = "multiprocess-0.70.15-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:20e024018c46d0d1602024c613007ac948f9754659e3853b0aa705e83f6931d8"}, + {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:e576062981c91f0fe8a463c3d52506e598dfc51320a8dd8d78b987dfca91c5db"}, + {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e73f497e6696a0f5433ada2b3d599ae733b87a6e8b008e387c62ac9127add177"}, + {file = "multiprocess-0.70.15-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:73db2e7b32dcc7f9b0f075c2ffa45c90b6729d3f1805f27e88534c8d321a1be5"}, + {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:4271647bd8a49c28ecd6eb56a7fdbd3c212c45529ad5303b40b3c65fc6928e5f"}, + {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cf981fb998d6ec3208cb14f0cf2e9e80216e834f5d51fd09ebc937c32b960902"}, + {file = "multiprocess-0.70.15-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:18f9f2c7063346d1617bd1684fdcae8d33380ae96b99427260f562e1a1228b67"}, + {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:0eac53214d664c49a34695e5824872db4006b1a465edd7459a251809c3773370"}, + {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:1a51dd34096db47fb21fa2b839e615b051d51b97af9a67afbcdaa67186b44883"}, + {file = "multiprocess-0.70.15-py310-none-any.whl", hash = "sha256:7dd58e33235e83cf09d625e55cffd7b0f0eede7ee9223cdd666a87624f60c21a"}, + {file = "multiprocess-0.70.15-py311-none-any.whl", hash = "sha256:134f89053d82c9ed3b73edd3a2531eb791e602d4f4156fc92a79259590bd9670"}, + {file = "multiprocess-0.70.15-py37-none-any.whl", hash = "sha256:f7d4a1629bccb433114c3b4885f69eccc200994323c80f6feee73b0edc9199c5"}, + {file = "multiprocess-0.70.15-py38-none-any.whl", hash = "sha256:bee9afba476c91f9ebee7beeee0601face9eff67d822e893f9a893725fbd6316"}, + {file = "multiprocess-0.70.15-py39-none-any.whl", hash = "sha256:3e0953f5d52b4c76f1c973eaf8214554d146f2be5decb48e928e55c7a2d19338"}, + {file = "multiprocess-0.70.15.tar.gz", hash = "sha256:f20eed3036c0ef477b07a4177cf7c1ba520d9a2677870a4f47fe026f0cd6787e"}, ] [package.dependencies] -dill = ">=0.3.6" +dill = ">=0.3.7" [[package]] name = "mypy-extensions" @@ -2675,12 +2772,12 @@ nvidia-nvjitlink-cu12 = "*" [[package]] name = "nvidia-nccl-cu12" -version = "2.18.1" +version = "2.19.3" description = "NVIDIA Collective Communication Library (NCCL) Runtime" optional = true python-versions = ">=3" files = [ - {file = "nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:1a6c4acefcbebfa6de320f412bf7866de856e786e0462326ba1bac40de0b5e71"}, + {file = "nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", hash = "sha256:a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d"}, ] [[package]] @@ -2707,13 +2804,13 @@ files = [ [[package]] name = "openai" -version = "1.10.0" +version = "1.11.1" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.10.0-py3-none-any.whl", hash = "sha256:aa69e97d0223ace9835fbf9c997abe9ee95318f684fd2de6d02c870700c71ebc"}, - {file = "openai-1.10.0.tar.gz", hash = "sha256:208886cb501b930dc63f48d51db9c15e5380380f80516d07332adad67c9f1053"}, + {file = "openai-1.11.1-py3-none-any.whl", hash = "sha256:e0f388ce499f53f58079d0c1f571f356f2b168b84d0d24a412506b6abc714980"}, + {file = "openai-1.11.1.tar.gz", hash = "sha256:f66b8fe431af43e09594147ef3cdcb79758285de72ebafd52be9700a2af41e99"}, ] [package.dependencies] @@ -2730,75 +2827,75 @@ datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] [[package]] name = "orjson" -version = "3.9.12" +version = "3.9.13" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = true python-versions = ">=3.8" files = [ - {file = "orjson-3.9.12-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:6b4e2bed7d00753c438e83b613923afdd067564ff7ed696bfe3a7b073a236e07"}, - {file = "orjson-3.9.12-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd1b8ec63f0bf54a50b498eedeccdca23bd7b658f81c524d18e410c203189365"}, - {file = "orjson-3.9.12-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ab8add018a53665042a5ae68200f1ad14c7953fa12110d12d41166f111724656"}, - {file = "orjson-3.9.12-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12756a108875526b76e505afe6d6ba34960ac6b8c5ec2f35faf73ef161e97e07"}, - {file = "orjson-3.9.12-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:890e7519c0c70296253660455f77e3a194554a3c45e42aa193cdebc76a02d82b"}, - {file = "orjson-3.9.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d664880d7f016efbae97c725b243b33c2cbb4851ddc77f683fd1eec4a7894146"}, - {file = "orjson-3.9.12-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:cfdaede0fa5b500314ec7b1249c7e30e871504a57004acd116be6acdda3b8ab3"}, - {file = "orjson-3.9.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6492ff5953011e1ba9ed1bf086835fd574bd0a3cbe252db8e15ed72a30479081"}, - {file = "orjson-3.9.12-cp310-none-win32.whl", hash = "sha256:29bf08e2eadb2c480fdc2e2daae58f2f013dff5d3b506edd1e02963b9ce9f8a9"}, - {file = "orjson-3.9.12-cp310-none-win_amd64.whl", hash = "sha256:0fc156fba60d6b50743337ba09f052d8afc8b64595112996d22f5fce01ab57da"}, - {file = "orjson-3.9.12-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:2849f88a0a12b8d94579b67486cbd8f3a49e36a4cb3d3f0ab352c596078c730c"}, - {file = "orjson-3.9.12-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3186b18754befa660b31c649a108a915493ea69b4fc33f624ed854ad3563ac65"}, - {file = "orjson-3.9.12-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbbf313c9fb9d4f6cf9c22ced4b6682230457741daeb3d7060c5d06c2e73884a"}, - {file = "orjson-3.9.12-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99e8cd005b3926c3db9b63d264bd05e1bf4451787cc79a048f27f5190a9a0311"}, - {file = "orjson-3.9.12-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59feb148392d9155f3bfed0a2a3209268e000c2c3c834fb8fe1a6af9392efcbf"}, - {file = "orjson-3.9.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4ae815a172a1f073b05b9e04273e3b23e608a0858c4e76f606d2d75fcabde0c"}, - {file = "orjson-3.9.12-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed398f9a9d5a1bf55b6e362ffc80ac846af2122d14a8243a1e6510a4eabcb71e"}, - {file = "orjson-3.9.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d3cfb76600c5a1e6be91326b8f3b83035a370e727854a96d801c1ea08b708073"}, - {file = "orjson-3.9.12-cp311-none-win32.whl", hash = "sha256:a2b6f5252c92bcab3b742ddb3ac195c0fa74bed4319acd74f5d54d79ef4715dc"}, - {file = "orjson-3.9.12-cp311-none-win_amd64.whl", hash = "sha256:c95488e4aa1d078ff5776b58f66bd29d628fa59adcb2047f4efd3ecb2bd41a71"}, - {file = "orjson-3.9.12-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:d6ce2062c4af43b92b0221ed4f445632c6bf4213f8a7da5396a122931377acd9"}, - {file = "orjson-3.9.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:950951799967558c214cd6cceb7ceceed6f81d2c3c4135ee4a2c9c69f58aa225"}, - {file = "orjson-3.9.12-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2dfaf71499d6fd4153f5c86eebb68e3ec1bf95851b030a4b55c7637a37bbdee4"}, - {file = "orjson-3.9.12-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:659a8d7279e46c97661839035a1a218b61957316bf0202674e944ac5cfe7ed83"}, - {file = "orjson-3.9.12-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af17fa87bccad0b7f6fd8ac8f9cbc9ee656b4552783b10b97a071337616db3e4"}, - {file = "orjson-3.9.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd52dec9eddf4c8c74392f3fd52fa137b5f2e2bed1d9ae958d879de5f7d7cded"}, - {file = "orjson-3.9.12-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:640e2b5d8e36b970202cfd0799d11a9a4ab46cf9212332cd642101ec952df7c8"}, - {file = "orjson-3.9.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:daa438bd8024e03bcea2c5a92cd719a663a58e223fba967296b6ab9992259dbf"}, - {file = "orjson-3.9.12-cp312-none-win_amd64.whl", hash = "sha256:1bb8f657c39ecdb924d02e809f992c9aafeb1ad70127d53fb573a6a6ab59d549"}, - {file = "orjson-3.9.12-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f4098c7674901402c86ba6045a551a2ee345f9f7ed54eeffc7d86d155c8427e5"}, - {file = "orjson-3.9.12-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5586a533998267458fad3a457d6f3cdbddbcce696c916599fa8e2a10a89b24d3"}, - {file = "orjson-3.9.12-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:54071b7398cd3f90e4bb61df46705ee96cb5e33e53fc0b2f47dbd9b000e238e1"}, - {file = "orjson-3.9.12-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:67426651faa671b40443ea6f03065f9c8e22272b62fa23238b3efdacd301df31"}, - {file = "orjson-3.9.12-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4a0cd56e8ee56b203abae7d482ac0d233dbfb436bb2e2d5cbcb539fe1200a312"}, - {file = "orjson-3.9.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a84a0c3d4841a42e2571b1c1ead20a83e2792644c5827a606c50fc8af7ca4bee"}, - {file = "orjson-3.9.12-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:09d60450cda3fa6c8ed17770c3a88473a16460cd0ff2ba74ef0df663b6fd3bb8"}, - {file = "orjson-3.9.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bc82a4db9934a78ade211cf2e07161e4f068a461c1796465d10069cb50b32a80"}, - {file = "orjson-3.9.12-cp38-none-win32.whl", hash = "sha256:61563d5d3b0019804d782137a4f32c72dc44c84e7d078b89d2d2a1adbaa47b52"}, - {file = "orjson-3.9.12-cp38-none-win_amd64.whl", hash = "sha256:410f24309fbbaa2fab776e3212a81b96a1ec6037259359a32ea79fbccfcf76aa"}, - {file = "orjson-3.9.12-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:e773f251258dd82795fd5daeac081d00b97bacf1548e44e71245543374874bcf"}, - {file = "orjson-3.9.12-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b159baecfda51c840a619948c25817d37733a4d9877fea96590ef8606468b362"}, - {file = "orjson-3.9.12-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:975e72e81a249174840d5a8df977d067b0183ef1560a32998be340f7e195c730"}, - {file = "orjson-3.9.12-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:06e42e899dde61eb1851a9fad7f1a21b8e4be063438399b63c07839b57668f6c"}, - {file = "orjson-3.9.12-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c157e999e5694475a5515942aebeed6e43f7a1ed52267c1c93dcfde7d78d421"}, - {file = "orjson-3.9.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dde1bc7c035f2d03aa49dc8642d9c6c9b1a81f2470e02055e76ed8853cfae0c3"}, - {file = "orjson-3.9.12-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b0e9d73cdbdad76a53a48f563447e0e1ce34bcecef4614eb4b146383e6e7d8c9"}, - {file = "orjson-3.9.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:96e44b21fe407b8ed48afbb3721f3c8c8ce17e345fbe232bd4651ace7317782d"}, - {file = "orjson-3.9.12-cp39-none-win32.whl", hash = "sha256:cbd0f3555205bf2a60f8812133f2452d498dbefa14423ba90fe89f32276f7abf"}, - {file = "orjson-3.9.12-cp39-none-win_amd64.whl", hash = "sha256:03ea7ee7e992532c2f4a06edd7ee1553f0644790553a118e003e3c405add41fa"}, - {file = "orjson-3.9.12.tar.gz", hash = "sha256:da908d23a3b3243632b523344403b128722a5f45e278a8343c2bb67538dff0e4"}, + {file = "orjson-3.9.13-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:fa6b67f8bef277c2a4aadd548d58796854e7d760964126c3209b19bccc6a74f1"}, + {file = "orjson-3.9.13-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b812417199eeb169c25f67815cfb66fd8de7ff098bf57d065e8c1943a7ba5c8f"}, + {file = "orjson-3.9.13-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7ccd5bd222e5041069ad9d9868ab59e6dbc53ecde8d8c82b919954fbba43b46b"}, + {file = "orjson-3.9.13-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaaf80957c38e9d3f796f355a80fad945e72cd745e6b64c210e635b7043b673e"}, + {file = "orjson-3.9.13-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:60da7316131185d0110a1848e9ad15311e6c8938ee0b5be8cbd7261e1d80ee8f"}, + {file = "orjson-3.9.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b98cd948372f0eb219bc309dee4633db1278687161e3280d9e693b6076951d2"}, + {file = "orjson-3.9.13-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3869d65561f10071d3e7f35ae58fd377056f67d7aaed5222f318390c3ad30339"}, + {file = "orjson-3.9.13-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:43fd6036b16bb6742d03dae62f7bdf8214d06dea47e4353cde7e2bd1358d186f"}, + {file = "orjson-3.9.13-cp310-none-win32.whl", hash = "sha256:0d3ba9d88e20765335260d7b25547d7c571eee2b698200f97afa7d8c7cd668fc"}, + {file = "orjson-3.9.13-cp310-none-win_amd64.whl", hash = "sha256:6e47153db080f5e87e8ba638f1a8b18995eede6b0abb93964d58cf11bcea362f"}, + {file = "orjson-3.9.13-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:4584e8eb727bc431baaf1bf97e35a1d8a0109c924ec847395673dfd5f4ef6d6f"}, + {file = "orjson-3.9.13-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f37f0cdd026ef777a4336e599d8194c8357fc14760c2a5ddcfdf1965d45504b"}, + {file = "orjson-3.9.13-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d714595d81efab11b42bccd119977d94b25d12d3a806851ff6bfd286a4bce960"}, + {file = "orjson-3.9.13-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9171e8e1a1f221953e38e84ae0abffe8759002fd8968106ee379febbb5358b33"}, + {file = "orjson-3.9.13-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ab9dbdec3f13f3ea6f937564ce21651844cfbf2725099f2f490426acf683c23"}, + {file = "orjson-3.9.13-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:811ac076855e33e931549340288e0761873baf29276ad00f221709933c644330"}, + {file = "orjson-3.9.13-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:860d0f5b42d0c0afd73fa4177709f6e1b966ba691fcd72175affa902052a81d6"}, + {file = "orjson-3.9.13-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:838b898e8c1f26eb6b8d81b180981273f6f5110c76c22c384979aca854194f1b"}, + {file = "orjson-3.9.13-cp311-none-win32.whl", hash = "sha256:d3222db9df629ef3c3673124f2e05fb72bc4a320c117e953fec0d69dde82e36d"}, + {file = "orjson-3.9.13-cp311-none-win_amd64.whl", hash = "sha256:978117122ca4cc59b28af5322253017f6c5fc03dbdda78c7f4b94ae984c8dd43"}, + {file = "orjson-3.9.13-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:031df1026c7ea8303332d78711f180231e3ae8b564271fb748a03926587c5546"}, + {file = "orjson-3.9.13-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fd9a2101d04e85086ea6198786a3f016e45475f800712e6833e14bf9ce2832f"}, + {file = "orjson-3.9.13-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:446d9ad04204e79229ae19502daeea56479e55cbc32634655d886f5a39e91b44"}, + {file = "orjson-3.9.13-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b57c0954a9fdd2b05b9cec0f5a12a0bdce5bf021a5b3b09323041613972481ab"}, + {file = "orjson-3.9.13-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:266e55c83f81248f63cc93d11c5e3a53df49a5d2598fa9e9db5f99837a802d5d"}, + {file = "orjson-3.9.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31372ba3a9fe8ad118e7d22fba46bbc18e89039e3bfa89db7bc8c18ee722dca8"}, + {file = "orjson-3.9.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e3b0c4da61f39899561e08e571f54472a09fa71717d9797928af558175ae5243"}, + {file = "orjson-3.9.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2cc03a35bfc71c8ebf96ce49b82c2a7be6af4b3cd3ac34166fdb42ac510bbfff"}, + {file = "orjson-3.9.13-cp312-none-win_amd64.whl", hash = "sha256:49b7e3fe861cb246361825d1a238f2584ed8ea21e714bf6bb17cebb86772e61c"}, + {file = "orjson-3.9.13-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:62e9a99879c4d5a04926ac2518a992134bfa00d546ea5a4cae4b9be454d35a22"}, + {file = "orjson-3.9.13-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d92a3e835a5100f1d5b566fff79217eab92223ca31900dba733902a182a35ab0"}, + {file = "orjson-3.9.13-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:23f21faf072ed3b60b5954686f98157e073f6a8068eaa58dbde83e87212eda84"}, + {file = "orjson-3.9.13-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:828c502bb261588f7de897e06cb23c4b122997cb039d2014cb78e7dabe92ef0c"}, + {file = "orjson-3.9.13-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16946d095212a3dec552572c5d9bca7afa40f3116ad49695a397be07d529f1fa"}, + {file = "orjson-3.9.13-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3deadd8dc0e9ff844b5b656fa30a48dbee1c3b332d8278302dd9637f6b09f627"}, + {file = "orjson-3.9.13-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9b1b5adc5adf596c59dca57156b71ad301d73956f5bab4039b0e34dbf50b9fa0"}, + {file = "orjson-3.9.13-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ddc089315d030c54f0f03fb38286e2667c05009a78d659f108a8efcfbdf2e585"}, + {file = "orjson-3.9.13-cp38-none-win32.whl", hash = "sha256:ae77275a28667d9c82d4522b681504642055efa0368d73108511647c6499b31c"}, + {file = "orjson-3.9.13-cp38-none-win_amd64.whl", hash = "sha256:730385fdb99a21fce9bb84bb7fcbda72c88626facd74956bda712834b480729d"}, + {file = "orjson-3.9.13-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:7e8e4a571d958910272af8d53a9cbe6599f9f5fd496a1bc51211183bb2072cbd"}, + {file = "orjson-3.9.13-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfad553a36548262e7da0f3a7464270e13900b898800fb571a5d4b298c3f8356"}, + {file = "orjson-3.9.13-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0d691c44604941945b00e0a13b19a7d9c1a19511abadf0080f373e98fdeb6b31"}, + {file = "orjson-3.9.13-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8c83718346de08d68b3cb1105c5d91e5fc39885d8610fdda16613d4e3941459"}, + {file = "orjson-3.9.13-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63ef57a53bfc2091a7cd50a640d9ae866bd7d92a5225a1bab6baa60ef62583f2"}, + {file = "orjson-3.9.13-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9156b96afa38db71344522f5517077eaedf62fcd2c9148392ff93d801128809c"}, + {file = "orjson-3.9.13-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31fb66b41fb2c4c817d9610f0bc7d31345728d7b5295ac78b63603407432a2b2"}, + {file = "orjson-3.9.13-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:8a730bf07feacb0863974e67b206b7c503a62199de1cece2eb0d4c233ec29c11"}, + {file = "orjson-3.9.13-cp39-none-win32.whl", hash = "sha256:5ef58869f3399acbbe013518d8b374ee9558659eef14bca0984f67cb1fbd3c37"}, + {file = "orjson-3.9.13-cp39-none-win_amd64.whl", hash = "sha256:9bcf56efdb83244cde070e82a69c0f03c47c235f0a5cb6c81d9da23af7fbaae4"}, + {file = "orjson-3.9.13.tar.gz", hash = "sha256:fc6bc65b0cf524ee042e0bc2912b9206ef242edfba7426cf95763e4af01f527a"}, ] [[package]] name = "oss2" -version = "2.18.4" +version = "2.13.1" description = "Aliyun OSS (Object Storage Service) SDK" optional = true python-versions = "*" files = [ - {file = "oss2-2.18.4.tar.gz", hash = "sha256:be1e7a871a8cc267726367333017d78333ee8fae88c727ad61396f59c1c0e4d0"}, + {file = "oss2-2.13.1.tar.gz", hash = "sha256:8548ea7d43326f6fd679bc8b79b3a2dfbfe9c6a60ed57e2410818fec57023dda"}, ] [package.dependencies] -aliyun-python-sdk-core = ">=2.13.12" +aliyun-python-sdk-core = ">=2.6.2" aliyun-python-sdk-kms = ">=2.4.1" crcmod = ">=1.7" pycryptodome = ">=3.4.7" @@ -2897,6 +2994,26 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "passlib" +version = "1.7.4" +description = "comprehensive password hashing framework supporting over 30 schemes" +optional = true +python-versions = "*" +files = [ + {file = "passlib-1.7.4-py2.py3-none-any.whl", hash = "sha256:aa6bca462b8d8bda89c70b382f0c298a20b5560af6cbfa2dce410c0a2fb669f1"}, + {file = "passlib-1.7.4.tar.gz", hash = "sha256:defd50f72b65c5402ab2c573830a6978e5f202ad0d984793c8dde2c4152ebe04"}, +] + +[package.dependencies] +bcrypt = {version = ">=3.1.0", optional = true, markers = "extra == \"bcrypt\""} + +[package.extras] +argon2 = ["argon2-cffi (>=18.2.0)"] +bcrypt = ["bcrypt (>=3.1.0)"] +build-docs = ["cloud-sptheme (>=1.10.1)", "sphinx (>=1.6)", "sphinxcontrib-fulltoc (>=1.2.0)"] +totp = ["cryptography"] + [[package]] name = "pathspec" version = "0.12.1" @@ -2995,18 +3112,18 @@ xmp = ["defusedxml"] [[package]] name = "platformdirs" -version = "4.1.0" +version = "4.2.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.1.0-py3-none-any.whl", hash = "sha256:11c8f37bcca40db96d8144522d925583bdb7a31f7b0e37e3ed4318400a8e2380"}, - {file = "platformdirs-4.1.0.tar.gz", hash = "sha256:906d548203468492d432bcb294d4bc2fff751bf84971fbb2c10918cc206ee420"}, + {file = "platformdirs-4.2.0-py3-none-any.whl", hash = "sha256:0614df2a2f37e1a662acbd8e2b25b92ccf8632929bc6d43467e17fe89c75e068"}, + {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, ] [package.extras] -docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.1)", "sphinx-autodoc-typehints (>=1.24)"] -test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)"] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] [[package]] name = "pluggy" @@ -3148,6 +3265,17 @@ files = [ [package.dependencies] numpy = ">=1.16.6,<2" +[[package]] +name = "pyarrow-hotfix" +version = "0.6" +description = "" +optional = true +python-versions = ">=3.5" +files = [ + {file = "pyarrow_hotfix-0.6-py3-none-any.whl", hash = "sha256:dcc9ae2d220dff0083be6a9aa8e0cdee5182ad358d4931fce825c545e5c89178"}, + {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"}, +] + [[package]] name = "pyasn1" version = "0.5.1" @@ -3368,42 +3496,42 @@ ujson = ">=2.0.0" [[package]] name = "pymupdf" -version = "1.23.20" +version = "1.23.21" description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." optional = false python-versions = ">=3.8" files = [ - {file = "PyMuPDF-1.23.20-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:4294f56061000686577c4c5c0c1b3ac9d07d3a9bedcd6ca3e79d55303278dec4"}, - {file = "PyMuPDF-1.23.20-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:f63e3498f1c323026dc90c733f77e5068ce91a8eb6277e508d7ef420e8ed22c0"}, - {file = "PyMuPDF-1.23.20-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:70e31488e0d74372fa287387d053b9ecb643783713486383e358aeba8ff38a56"}, - {file = "PyMuPDF-1.23.20-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:c7aecb501a402000b9e9d1aec441fe5c5a554f48e5d3fa72489603777fe97cb6"}, - {file = "PyMuPDF-1.23.20-cp310-none-win32.whl", hash = "sha256:e98976f2ab9617c2c2650d6b77ddc9136b3ed75df053518193650a23fcb2c11e"}, - {file = "PyMuPDF-1.23.20-cp310-none-win_amd64.whl", hash = "sha256:c403918110bfc7b448f15b5a4655b39b9f5e9d5baeb54e00778b90412dcfdcdb"}, - {file = "PyMuPDF-1.23.20-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:f46c9dd7b33f8a759f7c639757c266fcf89bf3541aa815bada1289593031b018"}, - {file = "PyMuPDF-1.23.20-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:0c2269e955cf40d5e019f17b22584627a3cd5f93d1bd4581d5dc793c9a7333be"}, - {file = "PyMuPDF-1.23.20-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:87cf3dff472a66d797eee7a8572244079f350de2d61cec79017ebe37de89544f"}, - {file = "PyMuPDF-1.23.20-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:47a51aa4128cbf5fd8d871d5f24132945d1b1672a196d3d3eaf7a843fde5bbb3"}, - {file = "PyMuPDF-1.23.20-cp311-none-win32.whl", hash = "sha256:e81a53cee68293dba0d03c94e5ce14f27b58482d0609665f855922d7ad4beca4"}, - {file = "PyMuPDF-1.23.20-cp311-none-win_amd64.whl", hash = "sha256:be882eb39246985dbec4ccc51a51a16b323cc5fbea8a41f3f9cf8dd2333a4246"}, - {file = "PyMuPDF-1.23.20-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:874c5b807c0706d5324a04efd49ce7fe6d141b240ee98343879ab65a487a7315"}, - {file = "PyMuPDF-1.23.20-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:010e233e1f8e7285256288f484234dbb06c38a6508c00bcd2f58177cb178fa70"}, - {file = "PyMuPDF-1.23.20-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:10f62420681d0976d65c925643b89c7076be235be63839a83b9931f1c0bc3ade"}, - {file = "PyMuPDF-1.23.20-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:47880c386ec2a72a2a3af768678ba02d837943d4385646147a7b4088af7ac03b"}, - {file = "PyMuPDF-1.23.20-cp312-none-win32.whl", hash = "sha256:6006b7f35cd7f330fba7dd5b9ffe7943aba5a24e7553c1299c541613c46169b6"}, - {file = "PyMuPDF-1.23.20-cp312-none-win_amd64.whl", hash = "sha256:f73cfeb316cbcfd01c31faf5ae8c00eacdf35cc23f97afb3a851694fd3acdcd5"}, - {file = "PyMuPDF-1.23.20-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:d56ca3864385fd333cec1caf678ab520c127435af1ce7346bfb07ad261c4f89b"}, - {file = "PyMuPDF-1.23.20-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:4262f8fa5af40b2a750084d311b2626969e240b2ab0eda038f4e25e07a3c65cd"}, - {file = "PyMuPDF-1.23.20-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:dcb4f29c373127023c20401ac6ec6f1e1b304dc0cc691a83122ed711511a467e"}, - {file = "PyMuPDF-1.23.20-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:048316ba76e4a4aba9a6baf07b8abc11b99265b2a71fd56c643577ffb03d6371"}, - {file = "PyMuPDF-1.23.20-cp38-none-win32.whl", hash = "sha256:550ecc5a5530e113d53d656a6ae30eb09bc25cc24bc2c4e7d1d83fec9ac2ac40"}, - {file = "PyMuPDF-1.23.20-cp38-none-win_amd64.whl", hash = "sha256:9870a25971807ad7f390692b44db6b1a2fe6afae099bd8a94a04115276dbb016"}, - {file = "PyMuPDF-1.23.20-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:a5975c44dfab7d4362e2cb5be148ee34086c6cd4cf941ac3c937c3fe684c5208"}, - {file = "PyMuPDF-1.23.20-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:8c215263e30f2281bfae357bfd2ece96ec47033c0db036e789f21eaf2ae44db5"}, - {file = "PyMuPDF-1.23.20-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:c9ab179d3d9b2a80181e6ece0d0c6090b013714efd2b7b375ef9f450fcfabc02"}, - {file = "PyMuPDF-1.23.20-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:4a913036aedb56e46285bd3b1cb34501e8077810bfa1e02dd99e30f044783641"}, - {file = "PyMuPDF-1.23.20-cp39-none-win32.whl", hash = "sha256:c54d161ada2444ce7eaa92485c06b0b1c8757258d86ede28a2673c7795422dca"}, - {file = "PyMuPDF-1.23.20-cp39-none-win_amd64.whl", hash = "sha256:397d7089517a5b0d4d5ce6bfbacd763642d65948b5cc9f6ee479e15abc263901"}, - {file = "PyMuPDF-1.23.20.tar.gz", hash = "sha256:30583360f58536d171954723d07c965ae4c489f4485f966cc921f23afbcfbf69"}, + {file = "PyMuPDF-1.23.21-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:92c24269dabc7f935ed6f27d8111c1f302cf17e2eb8659b12106dd7f06ccc8d3"}, + {file = "PyMuPDF-1.23.21-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:4a20550ce63120d98150a62eba0ed78536ab3ed46d30772805d9f39c8ad68df7"}, + {file = "PyMuPDF-1.23.21-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:ad836ab47617fd998e7637df5a702bec01e5e7617f0b79e1fe09efbe2bd83b6d"}, + {file = "PyMuPDF-1.23.21-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:13f86a2e95a36c78a21ad2642d603cc20e592dda34d75da035af6cf544527aca"}, + {file = "PyMuPDF-1.23.21-cp310-none-win32.whl", hash = "sha256:623ad46cef6d52e43de79acf25bfc0e549ed90ab37d7e34563feed0b8a5bbc7e"}, + {file = "PyMuPDF-1.23.21-cp310-none-win_amd64.whl", hash = "sha256:8edc13a96428639a2836b45c7670d114c09247d35e131191f373ef895467d864"}, + {file = "PyMuPDF-1.23.21-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:640b0f3a740f173ee725a8f7d6af3c0bdff268d9514618cf049c9b4ff8046d7d"}, + {file = "PyMuPDF-1.23.21-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:317a7d21aad5b853a2ca70bde2ab7438f845ca1f3a95236761b9cb40b2f7285f"}, + {file = "PyMuPDF-1.23.21-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:0c21b5cb7ea7603f99c4dded8514ee73c5c2711b7f43b5606fd0181f873e98fd"}, + {file = "PyMuPDF-1.23.21-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:be10b620d467503b743e244e81f573c84155f81b1ced54d6ce239a339a8af576"}, + {file = "PyMuPDF-1.23.21-cp311-none-win32.whl", hash = "sha256:2ae10b29d1a4dc0508ab4a8cff0f4746ec0a539a18520a85d7b45a2293fdf0b2"}, + {file = "PyMuPDF-1.23.21-cp311-none-win_amd64.whl", hash = "sha256:05695ee414b5e21a5da62050fe565c1fc047850e23ebde93c8ff6198a069f4b7"}, + {file = "PyMuPDF-1.23.21-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:e4c3b4b71357095be83ba101a09fc4755067140b6a19825cda0263c956eaa8bc"}, + {file = "PyMuPDF-1.23.21-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:5b39a0b278b35e0383757963fd7079ccbbd9544dcd0ef63157f45f4a223b2d35"}, + {file = "PyMuPDF-1.23.21-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:2e237eb0b1ef3c1f6526cca5f69f9d907d76a8822da5e33e673a0cf3d3e17773"}, + {file = "PyMuPDF-1.23.21-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:654ae0c2461af7c07beb73eb9d3814bc27de5e6dae4859fb1f565c46ddce012d"}, + {file = "PyMuPDF-1.23.21-cp312-none-win32.whl", hash = "sha256:01f550922196082dd571e9a831a0d69b5b2c2493636d9a69dc6bcb0dca122198"}, + {file = "PyMuPDF-1.23.21-cp312-none-win_amd64.whl", hash = "sha256:e1e65862414aee6f24a6cb83498f6de53544d56b18e948ccde41bd5f7743a554"}, + {file = "PyMuPDF-1.23.21-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:440abbbf8da20a2a9d516a1cbd92e416c18e415d941ea935471e9019a7717401"}, + {file = "PyMuPDF-1.23.21-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:31fe84ec377d37d940e1780936b9441ee1922b72a5e311e637f923bfbc38eaf7"}, + {file = "PyMuPDF-1.23.21-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:9b624b782a9cf38068048cde973d662f887ddb4c7de49e259797f5c6ffa84f0c"}, + {file = "PyMuPDF-1.23.21-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:e8141e6a01254b8b048b45eef3b87b826f4397110357d478262816487d219651"}, + {file = "PyMuPDF-1.23.21-cp38-none-win32.whl", hash = "sha256:130ad0c7b710060197b1e7dfdf3b64dbc2a07cc170a7dbcaf7d9b06ea861d6d1"}, + {file = "PyMuPDF-1.23.21-cp38-none-win_amd64.whl", hash = "sha256:1a0c30294d975efc4d31f23fae67ab6439ee215728d87be91a05e8b500abeabe"}, + {file = "PyMuPDF-1.23.21-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:fd3e6d49cad384f2ad2bd9a00e3e4fcdf09155e84fd7cf26bc1cec04eddfe67a"}, + {file = "PyMuPDF-1.23.21-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:cf8f7fa728c1942724105b08fe2b9cf711168b8ecf3aa883528633486f43456d"}, + {file = "PyMuPDF-1.23.21-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:1a977217a0f5dffb9ba422e547abbcffad7f3c62f3b6e488fec7ad1a74cc8d50"}, + {file = "PyMuPDF-1.23.21-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:033f80485b336ffd2577f4b99a1b6bd60567b2d1722288e88376b995f26c2994"}, + {file = "PyMuPDF-1.23.21-cp39-none-win32.whl", hash = "sha256:222737457b9c003b4aebd06a9d7c633115de6f64700a3b4cab3eb3ed72243ae8"}, + {file = "PyMuPDF-1.23.21-cp39-none-win_amd64.whl", hash = "sha256:46cd9a3acee024df0f3e9ec93b6ea2744b4927da2be3026a185c899f52d4147c"}, + {file = "PyMuPDF-1.23.21.tar.gz", hash = "sha256:79539ff09c5b7f8091bea3a9d48cd2490c1a14a3733589f280a4f48c51582c4c"}, ] [package.dependencies] @@ -3424,6 +3552,17 @@ files = [ {file = "PyMuPDFb-1.23.9-py3-none-win_amd64.whl", hash = "sha256:6a2a631fbd03330347b1ecf53f5534c4f7375b44e60b5ad8f36c5c96d4e6ec35"}, ] +[[package]] +name = "pynvml" +version = "11.5.0" +description = "Python Bindings for the NVIDIA Management Library" +optional = true +python-versions = ">=3.6" +files = [ + {file = "pynvml-11.5.0-py3-none-any.whl", hash = "sha256:5cce014ac01b098d08f06178f86c37be409b80b2e903a5a03ce15eed60f55e25"}, + {file = "pynvml-11.5.0.tar.gz", hash = "sha256:d027b21b95b1088b9fc278117f9f61b7c67f8e33a787e9f83f735f0f71ac32d0"}, +] + [[package]] name = "pyparsing" version = "3.1.1" @@ -3488,29 +3627,51 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-jose" +version = "3.3.0" +description = "JOSE implementation in Python" +optional = true +python-versions = "*" +files = [ + {file = "python-jose-3.3.0.tar.gz", hash = "sha256:55779b5e6ad599c6336191246e95eb2293a9ddebd555f796a65f838f07e5d78a"}, + {file = "python_jose-3.3.0-py2.py3-none-any.whl", hash = "sha256:9b1376b023f8b298536eedd47ae1089bcdb848f1535ab30555cd92002d78923a"}, +] + +[package.dependencies] +cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"cryptography\""} +ecdsa = "!=0.15" +pyasn1 = "*" +rsa = "*" + +[package.extras] +cryptography = ["cryptography (>=3.4.0)"] +pycrypto = ["pyasn1", "pycrypto (>=2.6.0,<2.7.0)"] +pycryptodome = ["pyasn1", "pycryptodome (>=3.3.1,<4.0.0)"] + [[package]] name = "python-multipart" -version = "0.0.6" +version = "0.0.7" description = "A streaming multipart parser for Python" optional = true python-versions = ">=3.7" files = [ - {file = "python_multipart-0.0.6-py3-none-any.whl", hash = "sha256:ee698bab5ef148b0a760751c261902cd096e57e10558e11aca17646b74ee1c18"}, - {file = "python_multipart-0.0.6.tar.gz", hash = "sha256:e9925a80bb668529f1b67c7fdb0a5dacdd7cbfc6fb0bff3ea443fe22bdd62132"}, + {file = "python_multipart-0.0.7-py3-none-any.whl", hash = "sha256:b1fef9a53b74c795e2347daac8c54b252d9e0df9c619712691c1cc8021bd3c49"}, + {file = "python_multipart-0.0.7.tar.gz", hash = "sha256:288a6c39b06596c1b988bb6794c6fbc80e6c369e35e5062637df256bee0c9af9"}, ] [package.extras] -dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatch", "invoke (==1.7.3)", "more-itertools (==4.3.0)", "pbr (==4.3.0)", "pluggy (==1.0.0)", "py (==1.11.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-timeout (==2.1.0)", "pyyaml (==5.1)"] +dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatch", "invoke (==2.2.0)", "more-itertools (==4.3.0)", "pbr (==4.3.0)", "pluggy (==1.0.0)", "py (==1.11.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-timeout (==2.1.0)", "pyyaml (==5.1)"] [[package]] name = "pytz" -version = "2023.4" +version = "2024.1" description = "World timezone definitions, modern and historical" optional = false python-versions = "*" files = [ - {file = "pytz-2023.4-py2.py3-none-any.whl", hash = "sha256:f90ef520d95e7c46951105338d918664ebfd6f1d995bd7d153127ce90efafa6a"}, - {file = "pytz-2023.4.tar.gz", hash = "sha256:31d4583c4ed539cd037956140d695e42c033a19e984bfce9964a3f7d59bc2b40"}, + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] [[package]] @@ -3587,6 +3748,16 @@ files = [ [package.dependencies] pyyaml = "*" +[[package]] +name = "quantile-python" +version = "1.1" +description = "Python Implementation of Graham Cormode and S. Muthukrishnan's Effective Computation of Biased Quantiles over Data Streams in ICDE'05" +optional = true +python-versions = "*" +files = [ + {file = "quantile-python-1.1.tar.gz", hash = "sha256:558629e88c497ef3b9b1081349c1ae6a61b53590e317724298ff54c674db7969"}, +] + [[package]] name = "redis" version = "4.6.0" @@ -3900,23 +4071,22 @@ pyasn1 = ">=0.1.3" [[package]] name = "s3fs" -version = "2023.12.2" +version = "0.6.0" description = "Convenient Filesystem interface over S3" optional = true -python-versions = ">= 3.8" +python-versions = ">= 3.7" files = [ - {file = "s3fs-2023.12.2-py3-none-any.whl", hash = "sha256:0d5a99039665f30b2dbee5495de3b299a022d51b3195a9440f5df47c2621b777"}, - {file = "s3fs-2023.12.2.tar.gz", hash = "sha256:b5ec07062481bbb45cb061b31984c7188d106e292c27033039e024e4ba5740dc"}, + {file = "s3fs-0.6.0-py3-none-any.whl", hash = "sha256:296a7e2c69f6f5414221a7688245c25e0c6d36eebc52cdf77fdaff77b10c7dd9"}, + {file = "s3fs-0.6.0.tar.gz", hash = "sha256:69a1226359b46137676d94e328ee26fb327f40199ce5b19d38bc276d4e0029ac"}, ] [package.dependencies] -aiobotocore = ">=2.5.4,<3.0.0" -aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "2023.12.2" +aiobotocore = ">=1.0.1" +fsspec = ">=0.8.0" [package.extras] -awscli = ["aiobotocore[awscli] (>=2.5.4,<3.0.0)"] -boto3 = ["aiobotocore[boto3] (>=2.5.4,<3.0.0)"] +awscli = ["aiobotocore[awscli]"] +boto3 = ["aiobotocore[boto3]"] [[package]] name = "safetensors" @@ -4382,30 +4552,30 @@ examples = ["fastapi"] [[package]] name = "starlette" -version = "0.35.1" +version = "0.36.3" description = "The little ASGI library that shines." optional = true python-versions = ">=3.8" files = [ - {file = "starlette-0.35.1-py3-none-any.whl", hash = "sha256:50bbbda9baa098e361f398fda0928062abbaf1f54f4fadcbe17c092a01eb9a25"}, - {file = "starlette-0.35.1.tar.gz", hash = "sha256:3e2639dac3520e4f58734ed22553f950d3f3cb1001cd2eaac4d57e8cdc5f66bc"}, + {file = "starlette-0.36.3-py3-none-any.whl", hash = "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044"}, + {file = "starlette-0.36.3.tar.gz", hash = "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080"}, ] [package.dependencies] anyio = ">=3.4.0,<5" [package.extras] -full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] +full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] [[package]] name = "streamlit" -version = "1.30.0" +version = "1.31.0" description = "A faster way to build and share data apps" optional = true python-versions = ">=3.8, !=3.9.7" files = [ - {file = "streamlit-1.30.0-py2.py3-none-any.whl", hash = "sha256:536494a4edfe9b66ed70c437176cfd6c7e36b1d99d0587b0be64245fa89c241b"}, - {file = "streamlit-1.30.0.tar.gz", hash = "sha256:90333915d9df8ce3b06de31b8a5bbab51e8cf0982dc6c32da9d6b1f2b4a9fa78"}, + {file = "streamlit-1.31.0-py2.py3-none-any.whl", hash = "sha256:4d95c4f5d6881f7adebaec14997fa7024bb38853412d1bba9588074d585563f9"}, + {file = "streamlit-1.31.0.tar.gz", hash = "sha256:40d71944e30394612481f80a8bc09e7de40d33b7a472989807467a5299e342ca"}, ] [package.dependencies] @@ -4420,7 +4590,7 @@ packaging = ">=16.8,<24" pandas = ">=1.3.0,<3" pillow = ">=7.1.0,<11" protobuf = ">=3.20,<5" -pyarrow = ">=6.0" +pyarrow = ">=7.0" pydeck = ">=0.8.0b4,<1" python-dateutil = ">=2.7.3,<3" requests = ">=2.27,<3" @@ -4713,31 +4883,36 @@ files = [ [[package]] name = "torch" -version = "2.1.2" +version = "2.2.0" description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" optional = true python-versions = ">=3.8.0" files = [ - {file = "torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:3a871edd6c02dae77ad810335c0833391c1a4ce49af21ea8cf0f6a5d2096eea8"}, - {file = "torch-2.1.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:bef6996c27d8f6e92ea4e13a772d89611da0e103b48790de78131e308cf73076"}, - {file = "torch-2.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:0e13034fd5fb323cbbc29e56d0637a3791e50dd589616f40c79adfa36a5a35a1"}, - {file = "torch-2.1.2-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:d9b535cad0df3d13997dbe8bd68ac33e0e3ae5377639c9881948e40794a61403"}, - {file = "torch-2.1.2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:f9a55d55af02826ebfbadf4e9b682f0f27766bc33df8236b48d28d705587868f"}, - {file = "torch-2.1.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:a6ebbe517097ef289cc7952783588c72de071d4b15ce0f8b285093f0916b1162"}, - {file = "torch-2.1.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:8f32ce591616a30304f37a7d5ea80b69ca9e1b94bba7f308184bf616fdaea155"}, - {file = "torch-2.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:e0ee6cf90c8970e05760f898d58f9ac65821c37ffe8b04269ec787aa70962b69"}, - {file = "torch-2.1.2-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:76d37967c31c99548ad2c4d3f2cf191db48476f2e69b35a0937137116da356a1"}, - {file = "torch-2.1.2-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:e2d83f07b4aac983453ea5bf8f9aa9dacf2278a8d31247f5d9037f37befc60e4"}, - {file = "torch-2.1.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:f41fe0c7ecbf903a568c73486139a75cfab287a0f6c17ed0698fdea7a1e8641d"}, - {file = "torch-2.1.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:e3225f47d50bb66f756fe9196a768055d1c26b02154eb1f770ce47a2578d3aa7"}, - {file = "torch-2.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:33d59cd03cb60106857f6c26b36457793637512998666ee3ce17311f217afe2b"}, - {file = "torch-2.1.2-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:8e221deccd0def6c2badff6be403e0c53491805ed9915e2c029adbcdb87ab6b5"}, - {file = "torch-2.1.2-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:05b18594f60a911a0c4f023f38a8bda77131fba5fd741bda626e97dcf5a3dd0a"}, - {file = "torch-2.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:9ca96253b761e9aaf8e06fb30a66ee301aecbf15bb5a303097de1969077620b6"}, - {file = "torch-2.1.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:d93ba70f67b08c2ae5598ee711cbc546a1bc8102cef938904b8c85c2089a51a0"}, - {file = "torch-2.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:255b50bc0608db177e6a3cc118961d77de7e5105f07816585fa6f191f33a9ff3"}, - {file = "torch-2.1.2-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:6984cd5057c0c977b3c9757254e989d3f1124f4ce9d07caa6cb637783c71d42a"}, - {file = "torch-2.1.2-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:bc195d7927feabc0eb7c110e457c955ed2ab616f3c7c28439dd4188cf589699f"}, + {file = "torch-2.2.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:d366158d6503a3447e67f8c0ad1328d54e6c181d88572d688a625fac61b13a97"}, + {file = "torch-2.2.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:707f2f80402981e9f90d0038d7d481678586251e6642a7a6ef67fc93511cb446"}, + {file = "torch-2.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:15c8f0a105c66b28496092fca1520346082e734095f8eaf47b5786bac24b8a31"}, + {file = "torch-2.2.0-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:0ca4df4b728515ad009b79f5107b00bcb2c63dc202d991412b9eb3b6a4f24349"}, + {file = "torch-2.2.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:3d3eea2d5969b9a1c9401429ca79efc668120314d443d3463edc3289d7f003c7"}, + {file = "torch-2.2.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0d1c580e379c0d48f0f0a08ea28d8e373295aa254de4f9ad0631f9ed8bc04c24"}, + {file = "torch-2.2.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:9328e3c1ce628a281d2707526b4d1080eae7c4afab4f81cea75bde1f9441dc78"}, + {file = "torch-2.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:03c8e660907ac1b8ee07f6d929c4e15cd95be2fb764368799cca02c725a212b8"}, + {file = "torch-2.2.0-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:da0cefe7f84ece3e3b56c11c773b59d1cb2c0fd83ddf6b5f7f1fd1a987b15c3e"}, + {file = "torch-2.2.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:f81d23227034221a4a4ff8ef24cc6cec7901edd98d9e64e32822778ff01be85e"}, + {file = "torch-2.2.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:dcbfb2192ac41ca93c756ebe9e2af29df0a4c14ee0e7a0dd78f82c67a63d91d4"}, + {file = "torch-2.2.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:9eeb42971619e24392c9088b5b6d387d896e267889d41d267b1fec334f5227c5"}, + {file = "torch-2.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:c718b2ca69a6cac28baa36d86d8c0ec708b102cebd1ceb1b6488e404cd9be1d1"}, + {file = "torch-2.2.0-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:f11d18fceb4f9ecb1ac680dde7c463c120ed29056225d75469c19637e9f98d12"}, + {file = "torch-2.2.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:ee1da852bfd4a7e674135a446d6074c2da7194c1b08549e31eae0b3138c6b4d2"}, + {file = "torch-2.2.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:0d819399819d0862268ac531cf12a501c253007df4f9e6709ede8a0148f1a7b8"}, + {file = "torch-2.2.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:08f53ccc38c49d839bc703ea1b20769cc8a429e0c4b20b56921a9f64949bf325"}, + {file = "torch-2.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:93bffe3779965a71dab25fc29787538c37c5d54298fd2f2369e372b6fb137d41"}, + {file = "torch-2.2.0-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:c17ec323da778efe8dad49d8fb534381479ca37af1bfc58efdbb8607a9d263a3"}, + {file = "torch-2.2.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:c02685118008834e878f676f81eab3a952b7936fa31f474ef8a5ff4b5c78b36d"}, + {file = "torch-2.2.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:d9f39d6f53cec240a0e3baa82cb697593340f9d4554cee6d3d6ca07925c2fac0"}, + {file = "torch-2.2.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:51770c065206250dc1222ea7c0eff3f88ab317d3e931cca2aee461b85fbc2472"}, + {file = "torch-2.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:008e4c6ad703de55af760c73bf937ecdd61a109f9b08f2bbb9c17e7c7017f194"}, + {file = "torch-2.2.0-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:de8680472dd14e316f42ceef2a18a301461a9058cd6e99a1f1b20f78f11412f1"}, + {file = "torch-2.2.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:99e1dcecb488e3fd25bcaac56e48cdb3539842904bdc8588b0b255fde03a254c"}, ] [package.dependencies] @@ -4754,15 +4929,15 @@ nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linu nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -nvidia-nccl-cu12 = {version = "2.18.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +nvidia-nccl-cu12 = {version = "2.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} sympy = "*" -triton = {version = "2.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} -typing-extensions = "*" +triton = {version = "2.2.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""} +typing-extensions = ">=4.8.0" [package.extras] -dynamo = ["jinja2"] opt-einsum = ["opt-einsum (>=3.3)"] +optree = ["optree (>=0.9.1)"] [[package]] name = "tornado" @@ -4874,28 +5049,26 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] [[package]] name = "triton" -version = "2.1.0" +version = "2.2.0" description = "A language and compiler for custom Deep Learning operations" optional = true python-versions = "*" files = [ - {file = "triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:66439923a30d5d48399b08a9eae10370f6c261a5ec864a64983bae63152d39d7"}, - {file = "triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:919b06453f0033ea52c13eaf7833de0e57db3178d23d4e04f9fc71c4f2c32bf8"}, - {file = "triton-2.1.0-0-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae4bb8a91de790e1866405211c4d618379781188f40d5c4c399766914e84cd94"}, - {file = "triton-2.1.0-0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39f6fb6bdccb3e98f3152e3fbea724f1aeae7d749412bbb1fa9c441d474eba26"}, - {file = "triton-2.1.0-0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21544e522c02005a626c8ad63d39bdff2f31d41069592919ef281e964ed26446"}, - {file = "triton-2.1.0-0-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:143582ca31dd89cd982bd3bf53666bab1c7527d41e185f9e3d8a3051ce1b663b"}, - {file = "triton-2.1.0-0-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82fc5aeeedf6e36be4e4530cbdcba81a09d65c18e02f52dc298696d45721f3bd"}, - {file = "triton-2.1.0-0-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:81a96d110a738ff63339fc892ded095b31bd0d205e3aace262af8400d40b6fa8"}, + {file = "triton-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2294514340cfe4e8f4f9e5c66c702744c4a117d25e618bd08469d0bfed1e2e5"}, + {file = "triton-2.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da58a152bddb62cafa9a857dd2bc1f886dbf9f9c90a2b5da82157cd2b34392b0"}, + {file = "triton-2.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af58716e721460a61886668b205963dc4d1e4ac20508cc3f623aef0d70283d5"}, + {file = "triton-2.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8fe46d3ab94a8103e291bd44c741cc294b91d1d81c1a2888254cbf7ff846dab"}, + {file = "triton-2.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ce26093e539d727e7cf6f6f0d932b1ab0574dc02567e684377630d86723ace"}, + {file = "triton-2.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:227cc6f357c5efcb357f3867ac2a8e7ecea2298cd4606a8ba1e931d1d5a947df"}, ] [package.dependencies] filelock = "*" [package.extras] -build = ["cmake (>=3.18)", "lit"] -tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)"] -tutorials = ["matplotlib", "pandas", "tabulate"] +build = ["cmake (>=3.20)", "lit"] +tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"] +tutorials = ["matplotlib", "pandas", "tabulate", "torch"] [[package]] name = "typing-extensions" @@ -5346,44 +5519,50 @@ files = [ [[package]] name = "xinference" -version = "0.6.5" +version = "0.8.4" description = "Model Serving Made Easy" optional = true python-versions = "*" files = [ - {file = "xinference-0.6.5-py3-none-any.whl", hash = "sha256:252827d24712d72cd0ffa3c97506b3b98cc770907ac8f2095256b4672b2012c9"}, - {file = "xinference-0.6.5.tar.gz", hash = "sha256:fcc78bf908a14c57c6568138f542e1a861f8f216f36ef44a8e7b088084d947b0"}, + {file = "xinference-0.8.4-py3-none-any.whl", hash = "sha256:8c0eca43eba020d04cd9badec80259d3de2eabebf04390b67b2b2f5d5460042a"}, + {file = "xinference-0.8.4.tar.gz", hash = "sha256:cc3fb6f9f548c5796f925049544271a9838bdcc1034ad3df415888d3d2b884e5"}, ] [package.dependencies] +aioprometheus = {version = ">=23.12.0", extras = ["starlette"]} +async-timeout = "*" click = "*" fastapi = "*" -fsspec = "*" -gradio = ">=3.39.0" -huggingface-hub = ">=0.14.1,<1.0" -modelscope = "*" +fsspec = ">=2023.1.0,<=2023.10.0" +gradio = ">=3.39.0,<4.0.0" +huggingface-hub = ">=0.19.4,<1.0" +modelscope = ">=1.10.0" openai = ">1" +passlib = {version = "*", extras = ["bcrypt"]} +pillow = "*" pydantic = "<2" +pynvml = "*" +python-jose = {version = "*", extras = ["cryptography"]} requests = "*" s3fs = "*" -sse-starlette = "*" +sse-starlette = ">=1.6.5" tabulate = "*" torch = "*" tqdm = ">=4.27" typing-extensions = "*" uvicorn = "*" -xoscar = ">=0.1.2" +xoscar = ">=0.2.1" [package.extras] -all = ["accelerate (>=0.20.3)", "auto-gptq", "bitsandbytes", "chatglm-cpp (>=0.3.0)", "controlnet-aux", "ctransformers", "diffusers", "einops", "llama-cpp-python (>=0.2.0)", "optimum", "orjson", "protobuf", "sentence-transformers", "sentencepiece", "tiktoken", "torch", "transformers (>=4.34.1)", "transformers-stream-generator", "vllm"] -benchmark = ["psutil", "pynvml"] -dev = ["black", "cython (>=0.29)", "flake8 (>=3.8.0)", "ipython (>=6.5.0)", "jieba (>=0.42.0)", "langchain", "openai (>1)", "opencv-python", "orjson", "pillow", "pydata-sphinx-theme (>=0.3.0)", "pytest (>=3.5.0)", "pytest-asyncio (>=0.14.0)", "pytest-cov (>=2.5.0)", "pytest-forked (>=1.0)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=1.2.0)", "sphinx (>=3.0.0,<5.0.0)", "sphinx-intl (>=0.9.9)", "sphinx-tabs"] -doc = ["ipython (>=6.5.0)", "pydata-sphinx-theme (>=0.3.0)", "sphinx (>=3.0.0,<5.0.0)", "sphinx-intl (>=0.9.9)", "sphinx-tabs"] -embedding = ["sentence-transformers"] -ggml = ["chatglm-cpp (>=0.3.0)", "ctransformers", "llama-cpp-python (>=0.2.0)"] +all = ["accelerate (>=0.20.3)", "auto-gptq", "bitsandbytes", "chatglm-cpp (>=0.3.0)", "controlnet-aux", "diffusers", "einops", "llama-cpp-python (>=0.2.25)", "optimum", "orjson", "protobuf", "sentence-transformers (>=2.3.1)", "sentencepiece", "tiktoken", "torch", "transformers (>=4.34.1)", "transformers-stream-generator", "vllm (>=0.2.6)"] +benchmark = ["psutil"] +dev = ["black", "cython (>=0.29)", "flake8 (>=3.8.0)", "ipython (>=6.5.0)", "jieba (>=0.42.0)", "langchain", "openai (>1)", "opencv-python", "orjson", "pydata-sphinx-theme (>=0.3.0)", "pytest (>=3.5.0)", "pytest-asyncio (>=0.14.0)", "pytest-cov (>=2.5.0)", "pytest-forked (>=1.0)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=1.2.0)", "sphinx (>=3.0.0)", "sphinx-design", "sphinx-intl (>=0.9.9)", "sphinx-tabs"] +doc = ["ipython (>=6.5.0)", "prometheus-client", "pydata-sphinx-theme (>=0.3.0)", "sphinx (>=3.0.0)", "sphinx-design", "sphinx-intl (>=0.9.9)", "sphinx-tabs"] +embedding = ["sentence-transformers (>=2.3.1)"] +ggml = ["chatglm-cpp (>=0.3.0)", "ctransformers", "llama-cpp-python (>=0.2.25)"] image = ["controlnet-aux", "diffusers"] transformers = ["accelerate (>=0.20.3)", "auto-gptq", "bitsandbytes", "einops", "optimum", "protobuf", "sentencepiece", "tiktoken", "torch", "transformers (>=4.34.1)", "transformers-stream-generator"] -vllm = ["vllm"] +vllm = ["vllm (>=0.2.6)"] [[package]] name = "xoscar" @@ -5690,4 +5869,4 @@ xinference = ["botocore", "xinference"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.12" -content-hash = "4d3d3ef8f71c665c7468a0940f55e7cb46e938d3e59323f657238afd273a124c" +content-hash = "fed4c0b8eb1a2809eb4485fcb2272bac288d40be2fb89e0c35403d36b6ff65b8" diff --git a/pyproject.toml b/pyproject.toml index e408adc9..00c71c87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ transformers = "^4.30.2" streamlit = { version = "^1.23.1", optional = true } gTTS = { version = "^2.3.2", optional = true } botocore = { version = "^1.33.9", optional = true } -xinference = { version = "^0.6.5", optional = true } +xinference = { version = "^0.8.4", optional = true } rsa = "^4.9" cryptography = "^41.0.7" neo4j-utils = "^0.0.7" From ee1951757f6db5e44ac9bc93f977a51a85dcd65a Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Mon, 5 Feb 2024 02:29:13 +0100 Subject: [PATCH 29/32] adjust quantisations for llama gguf --- benchmark/conftest.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/benchmark/conftest.py b/benchmark/conftest.py index dc803fc7..c182cce5 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -33,17 +33,15 @@ "model_format": "ggufv2", "quantization": [ "Q2_K", - # "Q3_K_L", - "Q3_K_M", # "Q3_K_S", - "Q4_0", - "Q4_1", + "Q3_K_M", + # "Q3_K_L", + # "Q4_0", + # "Q4_K_S", "Q4_K_M", - "Q4_K_S", - "Q5_0", - # "Q5_1", - "Q5_K_M", + # "Q5_0", # "Q5_K_S", + "Q5_K_M", "Q6_K", "Q8_0", ], @@ -55,12 +53,12 @@ "model_format": "ggufv2", "quantization": [ "Q2_K", - # "Q3_K_M", - "Q4_0", + "Q3_K_M", + # "Q4_0", "Q4_K_M", "Q5_0", # "Q5_K_M", - # "Q6_K", + "Q6_K", "Q8_0", ], }, From 066db58114b8929d18ab4d415582852907715b78 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Mon, 5 Feb 2024 10:50:03 +0100 Subject: [PATCH 30/32] run (some) llama 70B --- .../results/end_to_end_query_generation.csv | 12 +++++ benchmark/results/entity_selection.csv | 11 +++++ ...explicit_relevance_of_single_fragments.csv | 36 +++++++++++++++ ...plicit_relevance_of_multiple_fragments.csv | 12 +++++ .../end_to_end_query_generation.csv | 12 +++-- .../entity_selection.csv | 14 ++++-- ...explicit_relevance_of_single_fragments.csv | 18 +++++--- ...plicit_relevance_of_multiple_fragments.csv | 24 ++++++---- .../overview-aggregated.csv | 46 +++++++++++-------- .../preprocessed_for_frontend/overview.csv | 8 +++- .../property_exists.csv | 18 +++++--- .../property_selection.csv | 16 +++++-- .../query_generation.csv | 22 +++++---- .../relationship_selection.csv | 26 +++++++---- benchmark/results/property_exists.csv | 12 +++++ benchmark/results/property_selection.csv | 12 +++++ benchmark/results/query_generation.csv | 12 +++++ benchmark/results/relationship_selection.csv | 12 +++++ 18 files changed, 251 insertions(+), 72 deletions(-) diff --git a/benchmark/results/end_to_end_query_generation.csv b/benchmark/results/end_to_end_query_generation.csv index 975af2b2..88c0a474 100644 --- a/benchmark/results/end_to_end_query_generation.csv +++ b/benchmark/results/end_to_end_query_generation.csv @@ -23,6 +23,14 @@ llama-2-chat:13:ggufv2:Q6_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q6_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggufv2:Q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q3_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q5_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q5_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f @@ -45,11 +53,15 @@ llama-2-chat:7:ggufv2:Q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,0.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,0.0/8,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/entity_selection.csv b/benchmark/results/entity_selection.csv index 4de953b5..1bdaa6ea 100644 --- a/benchmark/results/entity_selection.csv +++ b/benchmark/results/entity_selection.csv @@ -23,7 +23,14 @@ llama-2-chat:13:ggufv2:Q6_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggufv2:Q8_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q2_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:70:ggufv2:Q2_K,single_word,0.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q3_K_M,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q3_K_M,single_word,0.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q4_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q4_K_M,single_word,1.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q5_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q5_K_M,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q2_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q2_K,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f @@ -46,11 +53,15 @@ llama-2-chat:7:ggufv2:Q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q8_0,single_word,2.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,0.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/explicit_relevance_of_single_fragments.csv b/benchmark/results/explicit_relevance_of_single_fragments.csv index 79b31719..c1907f7b 100644 --- a/benchmark/results/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/explicit_relevance_of_single_fragments.csv @@ -71,6 +71,30 @@ llama-2-chat:13:ggufv2:Q8_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1 llama-2-chat:13:ggufv2:Q8_0,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 llama-2-chat:13:ggufv2:Q8_0,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 llama-2-chat:13:ggufv2:Q8_0,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:70:ggufv2:Q2_K,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:70:ggufv2:Q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:70:ggufv2:Q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:70:ggufv2:Q2_K,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:70:ggufv2:Q2_K,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:70:ggufv2:Q2_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:70:ggufv2:Q3_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:70:ggufv2:Q3_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:70:ggufv2:Q3_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:70:ggufv2:Q3_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:70:ggufv2:Q3_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:70:ggufv2:Q3_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:70:ggufv2:Q4_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:70:ggufv2:Q4_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:70:ggufv2:Q4_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:70:ggufv2:Q4_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:70:ggufv2:Q4_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:70:ggufv2:Q4_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +llama-2-chat:70:ggufv2:Q5_K_M,explicit_evaluation_no,1.0/1,2,d15e0094569f8df146459b50a781fc55 +llama-2-chat:70:ggufv2:Q5_K_M,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 +llama-2-chat:70:ggufv2:Q5_K_M,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +llama-2-chat:70:ggufv2:Q5_K_M,explicit_relevance_no_repeat_instruction,1.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +llama-2-chat:70:ggufv2:Q5_K_M,explicit_relevance_no_simple,1.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +llama-2-chat:70:ggufv2:Q5_K_M,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 llama-2-chat:7:ggufv2:Q2_K,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 llama-2-chat:7:ggufv2:Q2_K,explicit_evaluation_yes,1.0/1,2,1773602eac8037fbea015069d5f15cd2 llama-2-chat:7:ggufv2:Q2_K,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 @@ -137,6 +161,12 @@ mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_no_more_explicit,0.0/1 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_no_repeat_instruction,0.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_no_simple,0.0/1,2,bf26b8241de3470cd9a406aea0992fb2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,explicit_relevance_yes,1.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,explicit_evaluation_yes,0.0/1,2,1773602eac8037fbea015069d5f15cd2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,explicit_relevance_no_more_explicit,0.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,explicit_relevance_no_repeat_instruction,0.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,explicit_relevance_no_simple,0.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_evaluation_yes,0.0/1,2,1773602eac8037fbea015069d5f15cd2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,explicit_relevance_no_more_explicit,1.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 @@ -155,6 +185,12 @@ mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_relevance_no_more_explicit,0.0/1 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_relevance_no_repeat_instruction,0.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_relevance_no_simple,0.5/1,2,bf26b8241de3470cd9a406aea0992fb2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,explicit_evaluation_no,0.0/1,2,d15e0094569f8df146459b50a781fc55 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,explicit_evaluation_yes,0.0/1,2,1773602eac8037fbea015069d5f15cd2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,explicit_relevance_no_more_explicit,0.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,explicit_relevance_no_repeat_instruction,0.0/1,2,1ca6c04890597e4ece0eb8ad632f3f75 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,explicit_relevance_no_simple,0.0/1,2,bf26b8241de3470cd9a406aea0992fb2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,explicit_relevance_yes,0.0/1,2,1f7a20371c9b65790b9b8e8be116b246 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_evaluation_no,0.5/1,2,d15e0094569f8df146459b50a781fc55 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_evaluation_yes,0.0/1,2,1773602eac8037fbea015069d5f15cd2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,explicit_relevance_no_more_explicit,0.0/1,2,8dddcfc1314f6f671d59a3a90c95e3c0 diff --git a/benchmark/results/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/implicit_relevance_of_multiple_fragments.csv index b2358bac..c4c7eb74 100644 --- a/benchmark/results/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/implicit_relevance_of_multiple_fragments.csv @@ -23,6 +23,14 @@ llama-2-chat:13:ggufv2:Q6_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308be llama-2-chat:13:ggufv2:Q6_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 llama-2-chat:13:ggufv2:Q8_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 llama-2-chat:13:ggufv2:Q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:70:ggufv2:Q2_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:70:ggufv2:Q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:70:ggufv2:Q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:70:ggufv2:Q3_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:70:ggufv2:Q4_K_M,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:70:ggufv2:Q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +llama-2-chat:70:ggufv2:Q5_K_M,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 +llama-2-chat:70:ggufv2:Q5_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 llama-2-chat:7:ggufv2:Q2_K,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 llama-2-chat:7:ggufv2:Q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 llama-2-chat:7:ggufv2:Q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 @@ -45,11 +53,15 @@ llama-2-chat:7:ggufv2:Q8_0,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308beb llama-2-chat:7:ggufv2:Q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,implicit_relevance_no,0.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,implicit_relevance_yes,0.5/1,2,f9d749647929fcb55321c614a3bf8d20 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,implicit_relevance_no,1.0/1,2,b24fb31fd761b0f3e308bebd70ce4277 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,implicit_relevance_no,0.5/1,2,b24fb31fd761b0f3e308bebd70ce4277 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,implicit_relevance_yes,1.0/1,2,f9d749647929fcb55321c614a3bf8d20 diff --git a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv index 4bb10cb5..9efe2c49 100644 --- a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv @@ -1,20 +1,26 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-4,11.5,16.0,0.71875,2 gpt-3.5-turbo,11.0,16.0,0.6875,2 -llama-2-chat:7:ggufv2:Q4_0,0.0,16.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,16.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,0.0,16.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,16.0,0.0,2 -llama-2-chat:7:ggufv2:Q8_0,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q3_K_M,0.0,16.0,0.0,2 llama-2-chat:7:ggufv2:Q6_K,0.0,16.0,0.0,2 llama-2-chat:7:ggufv2:Q5_K_M,0.0,16.0,0.0,2 llama-2-chat:7:ggufv2:Q5_0,0.0,16.0,0.0,2 llama-2-chat:7:ggufv2:Q4_K_S,0.0,16.0,0.0,2 llama-2-chat:7:ggufv2:Q4_K_M,0.0,16.0,0.0,2 llama-2-chat:7:ggufv2:Q4_1,0.0,16.0,0.0,2 -llama-2-chat:7:ggufv2:Q3_K_M,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_0,0.0,16.0,0.0,2 +llama-2-chat:7:ggufv2:Q8_0,0.0,16.0,0.0,2 llama-2-chat:7:ggufv2:Q2_K,0.0,16.0,0.0,2 +llama-2-chat:70:ggufv2:Q5_K_M,0.0,16.0,0.0,2 +llama-2-chat:70:ggufv2:Q4_K_M,0.0,16.0,0.0,2 +llama-2-chat:70:ggufv2:Q3_K_M,0.0,16.0,0.0,2 +llama-2-chat:70:ggufv2:Q2_K,0.0,16.0,0.0,2 llama-2-chat:13:ggufv2:Q8_0,0.0,16.0,0.0,2 llama-2-chat:13:ggufv2:Q6_K,0.0,16.0,0.0,2 llama-2-chat:13:ggufv2:Q5_K_M,0.0,16.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/entity_selection.csv b/benchmark/results/preprocessed_for_frontend/entity_selection.csv index 8cb1b2a8..16f2d979 100644 --- a/benchmark/results/preprocessed_for_frontend/entity_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/entity_selection.csv @@ -1,7 +1,8 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,4.0,4.0,1.0,2 gpt-4,4.0,4.0,1.0,2 -llama-2-chat:7:ggufv2:Q2_K,3.0,5.0,0.6,2 +llama-2-chat:70:ggufv2:Q5_K_M,4.0,5.0,0.8,2 +llama-2-chat:70:ggufv2:Q4_K_M,3.0,4.0,0.75,2 llama-2-chat:7:ggufv2:Q8_0,3.0,5.0,0.6,2 llama-2-chat:7:ggufv2:Q5_K_M,3.0,5.0,0.6,2 llama-2-chat:7:ggufv2:Q5_0,3.0,5.0,0.6,2 @@ -9,20 +10,25 @@ llama-2-chat:7:ggufv2:Q4_K_S,3.0,5.0,0.6,2 llama-2-chat:7:ggufv2:Q4_1,3.0,5.0,0.6,2 llama-2-chat:7:ggufv2:Q4_0,3.0,5.0,0.6,2 llama-2-chat:7:ggufv2:Q3_K_M,3.0,5.0,0.6,2 +llama-2-chat:7:ggufv2:Q2_K,3.0,5.0,0.6,2 llama-2-chat:7:ggufv2:Q4_K_M,2.5,5.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,5.0,0.4,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,2.0,5.0,0.4,2 llama-2-chat:13:ggufv2:Q4_0,1.0,5.0,0.2,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,5.0,0.2,2 llama-2-chat:7:ggufv2:Q6_K,1.0,5.0,0.2,2 llama-2-chat:13:ggufv2:Q3_K_M,0.0,5.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,5.0,0.0,2 -llama-2-chat:13:ggufv2:Q2_K,0.0,5.0,0.0,2 -llama-2-chat:13:ggufv2:Q6_K,0.0,5.0,0.0,2 -llama-2-chat:13:ggufv2:Q8_0,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q2_K,0.0,4.0,0.0,2 +llama-2-chat:70:ggufv2:Q2_K,0.0,4.0,0.0,2 +llama-2-chat:70:ggufv2:Q3_K_M,0.0,4.0,0.0,2 llama-2-chat:13:ggufv2:Q4_1,0.0,5.0,0.0,2 llama-2-chat:13:ggufv2:Q4_K_M,0.0,5.0,0.0,2 llama-2-chat:13:ggufv2:Q4_K_S,0.0,5.0,0.0,2 llama-2-chat:13:ggufv2:Q5_0,0.0,5.0,0.0,2 llama-2-chat:13:ggufv2:Q5_K_M,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q6_K,0.0,5.0,0.0,2 +llama-2-chat:13:ggufv2:Q8_0,0.0,5.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,5.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv index 73331d63..9f44ca4a 100644 --- a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv @@ -1,16 +1,17 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,6.0,6.0,1.0,2 -llama-2-chat:13:ggufv2:Q8_0,6.0,6.0,1.0,2 -llama-2-chat:7:ggufv2:Q8_0,6.0,6.0,1.0,2 -llama-2-chat:7:ggufv2:Q6_K,6.0,6.0,1.0,2 -llama-2-chat:7:ggufv2:Q5_K_M,6.0,6.0,1.0,2 -llama-2-chat:7:ggufv2:Q5_0,6.0,6.0,1.0,2 +llama-2-chat:70:ggufv2:Q2_K,6.0,6.0,1.0,2 llama-2-chat:7:ggufv2:Q4_K_S,6.0,6.0,1.0,2 llama-2-chat:7:ggufv2:Q4_K_M,6.0,6.0,1.0,2 llama-2-chat:7:ggufv2:Q4_1,6.0,6.0,1.0,2 llama-2-chat:7:ggufv2:Q4_0,6.0,6.0,1.0,2 -gpt-4,6.0,6.0,1.0,2 llama-2-chat:7:ggufv2:Q3_K_M,6.0,6.0,1.0,2 +gpt-4,6.0,6.0,1.0,2 +llama-2-chat:70:ggufv2:Q5_K_M,6.0,6.0,1.0,2 +llama-2-chat:70:ggufv2:Q4_K_M,6.0,6.0,1.0,2 +llama-2-chat:70:ggufv2:Q3_K_M,6.0,6.0,1.0,2 +llama-2-chat:13:ggufv2:Q8_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q5_K_M,6.0,6.0,1.0,2 llama-2-chat:13:ggufv2:Q6_K,6.0,6.0,1.0,2 llama-2-chat:13:ggufv2:Q5_K_M,6.0,6.0,1.0,2 llama-2-chat:13:ggufv2:Q5_0,6.0,6.0,1.0,2 @@ -18,6 +19,9 @@ llama-2-chat:13:ggufv2:Q4_K_S,6.0,6.0,1.0,2 llama-2-chat:13:ggufv2:Q4_K_M,6.0,6.0,1.0,2 llama-2-chat:13:ggufv2:Q4_1,6.0,6.0,1.0,2 llama-2-chat:13:ggufv2:Q4_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q6_K,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q8_0,6.0,6.0,1.0,2 +llama-2-chat:7:ggufv2:Q5_0,6.0,6.0,1.0,2 llama-2-chat:7:ggufv2:Q2_K,5.0,6.0,0.8333333333333334,2 llama-2-chat:13:ggufv2:Q3_K_M,5.0,6.0,0.8333333333333334,2 llama-2-chat:13:ggufv2:Q2_K,5.0,6.0,0.8333333333333334,2 @@ -26,3 +30,5 @@ mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2.0,6.0,0.3333333333333333,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,1.0,6.0,0.16666666666666666,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.5,6.0,0.08333333333333333,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.5,6.0,0.08333333333333333,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,0.0,6.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv index b6da22e9..a25c00c6 100644 --- a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv @@ -1,28 +1,34 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,2.0,2.0,1.0,2 -gpt-4,2.0,2.0,1.0,2 +llama-2-chat:70:ggufv2:Q4_K_M,2.0,2.0,1.0,2 llama-2-chat:13:ggufv2:Q2_K,2.0,2.0,1.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,2.0,2.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,2.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.0,2.0,1.0,2 +gpt-4,2.0,2.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.5,2.0,0.75,2 llama-2-chat:7:ggufv2:Q5_K_M,1.5,2.0,0.75,2 +llama-2-chat:70:ggufv2:Q5_K_M,1.5,2.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1.5,2.0,0.75,2 llama-2-chat:13:ggufv2:Q4_K_S,1.5,2.0,0.75,2 -llama-2-chat:13:ggufv2:Q5_0,1.0,2.0,0.5,2 -llama-2-chat:7:ggufv2:Q5_0,1.0,2.0,0.5,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q8_0,1.0,2.0,0.5,2 +llama-2-chat:70:ggufv2:Q2_K,1.0,2.0,0.5,2 llama-2-chat:13:ggufv2:Q3_K_M,1.0,2.0,0.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,2.0,0.5,2 llama-2-chat:13:ggufv2:Q4_0,1.0,2.0,0.5,2 llama-2-chat:13:ggufv2:Q4_1,1.0,2.0,0.5,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q4_K_M,1.0,2.0,0.5,2 llama-2-chat:7:ggufv2:Q8_0,1.0,2.0,0.5,2 llama-2-chat:7:ggufv2:Q6_K,1.0,2.0,0.5,2 -llama-2-chat:13:ggufv2:Q4_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q5_0,1.0,2.0,0.5,2 llama-2-chat:7:ggufv2:Q4_K_S,1.0,2.0,0.5,2 -llama-2-chat:13:ggufv2:Q5_K_M,1.0,2.0,0.5,2 llama-2-chat:7:ggufv2:Q4_K_M,1.0,2.0,0.5,2 llama-2-chat:7:ggufv2:Q4_1,1.0,2.0,0.5,2 llama-2-chat:7:ggufv2:Q4_0,1.0,2.0,0.5,2 -llama-2-chat:7:ggufv2:Q2_K,1.0,2.0,0.5,2 -llama-2-chat:13:ggufv2:Q8_0,1.0,2.0,0.5,2 -llama-2-chat:13:ggufv2:Q6_K,1.0,2.0,0.5,2 llama-2-chat:7:ggufv2:Q3_K_M,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q5_0,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q5_K_M,1.0,2.0,0.5,2 +llama-2-chat:13:ggufv2:Q6_K,1.0,2.0,0.5,2 +llama-2-chat:70:ggufv2:Q3_K_M,1.0,2.0,0.5,2 +llama-2-chat:7:ggufv2:Q2_K,1.0,2.0,0.5,2 diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv index 0ca0cf15..1fe19785 100644 --- a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv +++ b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv @@ -1,28 +1,34 @@ Model name,Size,Version,Quantisation,Mean,SD gpt-4,,,,0.9,0.12 gpt-3.5-turbo,,,,0.88,0.16 -llama-2-chat,7,ggufv2,q3_K_M,0.52,0.34 -llama-2-chat,7,ggufv2,q5_0,0.49,0.32 -llama-2-chat,7,ggufv2,q5_K_M,0.47,0.38 -llama-2-chat,13,ggufv2,q5_0,0.46,0.53 -llama-2-chat,7,ggufv2,q4_0,0.45,0.39 -llama-2-chat,7,ggufv2,q4_K_S,0.44,0.37 -llama-2-chat,13,ggufv2,q4_1,0.43,0.48 -llama-2-chat,7,ggufv2,q4_1,0.42,0.35 -llama-2-chat,13,ggufv2,q4_0,0.41,0.4 -llama-2-chat,7,ggufv2,q4_K_M,0.41,0.35 -llama-2-chat,7,ggufv2,q8_0,0.4,0.34 -llama-2-chat,13,ggufv2,q5_K_M,0.4,0.43 -llama-2-chat,13,ggufv2,q4_K_S,0.38,0.39 -llama-2-chat,13,ggufv2,q8_0,0.38,0.4 -llama-2-chat,13,ggufv2,q6_K,0.38,0.4 -llama-2-chat,13,ggufv2,q4_K_M,0.38,0.4 -llama-2-chat,7,ggufv2,q6_K,0.36,0.34 +llama-2-chat,70,ggufv2,Q4_K_M,0.66,0.41 +llama-2-chat,70,ggufv2,Q5_K_M,0.57,0.37 +llama-2-chat,7,ggufv2,Q3_K_M,0.52,0.34 +llama-2-chat,7,ggufv2,Q5_0,0.49,0.32 +llama-2-chat,7,ggufv2,Q5_K_M,0.47,0.38 +llama-2-chat,13,ggufv2,Q5_0,0.46,0.53 +llama-2-chat,7,ggufv2,Q4_0,0.45,0.39 +llama-2-chat,7,ggufv2,Q4_K_S,0.44,0.37 +llama-2-chat,13,ggufv2,Q4_1,0.43,0.48 +llama-2-chat,7,ggufv2,Q4_1,0.42,0.35 +llama-2-chat,13,ggufv2,Q4_0,0.41,0.4 +llama-2-chat,7,ggufv2,Q4_K_M,0.41,0.35 +llama-2-chat,7,ggufv2,Q8_0,0.4,0.34 +llama-2-chat,13,ggufv2,Q5_K_M,0.4,0.43 +llama-2-chat,70,ggufv2,Q2_K,0.38,0.42 +llama-2-chat,13,ggufv2,Q4_K_S,0.38,0.39 +llama-2-chat,13,ggufv2,Q8_0,0.38,0.4 +llama-2-chat,13,ggufv2,Q4_K_M,0.38,0.4 +llama-2-chat,13,ggufv2,Q6_K,0.38,0.4 +llama-2-chat,7,ggufv2,Q6_K,0.36,0.34 mixtral-instruct-v0.1,"46,7",ggufv2,Q2_K,0.35,0.39 mixtral-instruct-v0.1,"46,7",ggufv2,Q4_0,0.35,0.38 mixtral-instruct-v0.1,"46,7",ggufv2,Q4_K_M,0.35,0.37 -llama-2-chat,13,ggufv2,q3_K_M,0.34,0.35 +llama-2-chat,70,ggufv2,Q3_K_M,0.35,0.37 +llama-2-chat,13,ggufv2,Q3_K_M,0.34,0.35 mixtral-instruct-v0.1,"46,7",ggufv2,Q5_0,0.33,0.28 -llama-2-chat,7,ggufv2,q2_K,0.33,0.3 -llama-2-chat,13,ggufv2,q2_K,0.3,0.4 +llama-2-chat,7,ggufv2,Q2_K,0.33,0.3 +mixtral-instruct-v0.1,"46,7",ggufv2,Q6_K,0.3,0.4 +llama-2-chat,13,ggufv2,Q2_K,0.3,0.4 +mixtral-instruct-v0.1,"46,7",ggufv2,Q3_K_M,0.28,0.3 mixtral-instruct-v0.1,"46,7",ggufv2,Q8_0,0.28,0.34 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index cc56c684..bf526988 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,6 +1,8 @@ Model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean,SD gpt-4,0.7647058823529411,0.75,1.0,1.0,1.0,0.71875,1.0,1.0,0.9041819852941176,0.12425585940001706 gpt-3.5-turbo,0.6470588235294118,0.6875,1.0,1.0,1.0,0.6875,1.0,1.0,0.8777573529411764,0.1582457961403944 +llama-2-chat:70:ggufv2:Q4_K_M,0.0,0.5,1.0,0.75,1.0,0.0,1.0,1.0,0.65625,0.41339864235384227 +llama-2-chat:70:ggufv2:Q5_K_M,0.0,0.5,1.0,0.8,0.5,0.0,0.75,1.0,0.56875,0.3732772930409778 llama-2-chat:7:ggufv2:Q3_K_M,0.0,0.65625,1.0,0.6,0.5,0.0,0.5,0.875,0.51640625,0.3396479320906245 llama-2-chat:7:ggufv2:Q5_0,0.0,0.625,1.0,0.6,0.5,0.0,0.5,0.6666666666666666,0.4864583333333333,0.3166101237896715 llama-2-chat:7:ggufv2:Q5_K_M,0.0,0.625,1.0,0.6,0.0,0.0,0.75,0.75,0.465625,0.37768569522156914 @@ -13,16 +15,20 @@ llama-2-chat:13:ggufv2:Q4_0,0.0,0.5625,1.0,0.2,0.0,0.0,0.5,1.0,0.4078125,0.39936 llama-2-chat:7:ggufv2:Q4_K_M,0.0,0.59375,1.0,0.5,0.0,0.0,0.5,0.6666666666666666,0.4075520833333333,0.347885380173866 llama-2-chat:7:ggufv2:Q8_0,0.0,0.625,1.0,0.6,0.0,0.0,0.5,0.5,0.403125,0.3447003907381017 llama-2-chat:13:ggufv2:Q5_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,1.0,0.3984375,0.4261215669779576 +llama-2-chat:70:ggufv2:Q2_K,0.0,0.5625,1.0,0.0,0.0,0.0,0.5,1.0,0.3828125,0.41743906273101705 llama-2-chat:13:ggufv2:Q4_K_S,0.0,0.625,1.0,0.0,0.0,0.0,0.75,0.6666666666666666,0.3802083333333333,0.3938753658448881 llama-2-chat:13:ggufv2:Q8_0,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 -llama-2-chat:13:ggufv2:Q6_K,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 llama-2-chat:13:ggufv2:Q4_K_M,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 +llama-2-chat:13:ggufv2:Q6_K,0.0,0.6875,1.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.3776041666666667,0.3994404587939581 llama-2-chat:7:ggufv2:Q6_K,0.0,0.65625,1.0,0.2,0.0,0.0,0.5,0.5,0.35703125,0.3447632438876533 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.71875,0.3333333333333333,0.0,0.0,0.0,0.75,1.0,0.35026041666666663,0.3887084057720928 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.75,0.4166666666666667,0.0,0.0,0.0,1.0,0.625,0.34895833333333337,0.379565666895876 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,0.75,0.1666666666666666,0.2,0.0,0.0,1.0,0.6666666666666666,0.34791666666666665,0.3718793767249447 +llama-2-chat:70:ggufv2:Q3_K_M,0.0,0.5625,1.0,0.0,0.0,0.0,0.5,0.7,0.3453125,0.37122777185139316 llama-2-chat:13:ggufv2:Q3_K_M,0.0,0.6875,0.8333333333333334,0.0,0.0,0.0,0.5,0.6666666666666666,0.3359375,0.34617790000931764 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,0.75,0.0833333333333333,0.4,0.25,0.0,0.5,0.6666666666666666,0.33125,0.2758116179770372 llama-2-chat:7:ggufv2:Q2_K,0.2352941176470588,0.4375,0.8333333333333334,0.6,0.0,0.0,0.5,0.0,0.325765931372549,0.29627404541647706 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,0.0,0.75,0.0,0.0,0.0,0.0,1.0,0.6666666666666666,0.3020833333333333,0.3995168523076064 llama-2-chat:13:ggufv2:Q2_K,0.0,0.5625,0.8333333333333334,0.0,0.0,0.0,1.0,0.0,0.2994791666666667,0.4020802973762759 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,0.0,0.75,0.0,0.4,0.0,0.0,0.5,0.625,0.284375,0.2992014110511513 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,0.75,0.0833333333333333,0.0,0.0,0.0,0.75,0.6666666666666666,0.28125,0.3434341983715528 diff --git a/benchmark/results/preprocessed_for_frontend/property_exists.csv b/benchmark/results/preprocessed_for_frontend/property_exists.csv index 27db2be5..72586205 100644 --- a/benchmark/results/preprocessed_for_frontend/property_exists.csv +++ b/benchmark/results/preprocessed_for_frontend/property_exists.csv @@ -2,24 +2,30 @@ Model name,Passed test cases,Total test cases,Score,Iterations llama-2-chat:13:ggufv2:Q5_0,1.5,1.0,1.5,2 llama-2-chat:13:ggufv2:Q4_1,2.5,2.0,1.25,2 gpt-3.5-turbo,4.0,4.0,1.0,2 +llama-2-chat:70:ggufv2:Q2_K,4.0,4.0,1.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,2.0,2.0,1.0,2 llama-2-chat:7:ggufv2:Q4_0,4.0,4.0,1.0,2 llama-2-chat:13:ggufv2:Q4_0,1.0,1.0,1.0,2 gpt-4,4.0,4.0,1.0,2 llama-2-chat:13:ggufv2:Q5_K_M,3.0,3.0,1.0,2 +llama-2-chat:70:ggufv2:Q5_K_M,4.0,4.0,1.0,2 +llama-2-chat:70:ggufv2:Q4_K_M,4.0,4.0,1.0,2 llama-2-chat:7:ggufv2:Q3_K_M,3.5,4.0,0.875,2 -llama-2-chat:7:ggufv2:Q4_K_S,2.5,3.0,0.8333333333333334,2 llama-2-chat:13:ggufv2:Q8_0,2.5,3.0,0.8333333333333334,2 -llama-2-chat:13:ggufv2:Q4_K_M,2.5,3.0,0.8333333333333334,2 llama-2-chat:13:ggufv2:Q6_K,2.5,3.0,0.8333333333333334,2 +llama-2-chat:13:ggufv2:Q4_K_M,2.5,3.0,0.8333333333333334,2 +llama-2-chat:7:ggufv2:Q4_K_S,2.5,3.0,0.8333333333333334,2 llama-2-chat:7:ggufv2:Q5_K_M,3.0,4.0,0.75,2 -llama-2-chat:13:ggufv2:Q4_K_S,2.0,3.0,0.6666666666666666,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 +llama-2-chat:70:ggufv2:Q3_K_M,3.5,5.0,0.7,2 llama-2-chat:7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.0,3.0,0.6666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,2.0,3.0,0.6666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.0,3.0,0.6666666666666666,2 llama-2-chat:7:ggufv2:Q4_K_M,2.0,3.0,0.6666666666666666,2 +llama-2-chat:13:ggufv2:Q4_K_S,2.0,3.0,0.6666666666666666,2 llama-2-chat:13:ggufv2:Q3_K_M,2.0,3.0,0.6666666666666666,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.0,3.0,0.6666666666666666,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,2.5,4.0,0.625,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,2.5,4.0,0.625,2 llama-2-chat:7:ggufv2:Q4_1,3.0,5.0,0.6,2 llama-2-chat:7:ggufv2:Q6_K,2.0,4.0,0.5,2 diff --git a/benchmark/results/preprocessed_for_frontend/property_selection.csv b/benchmark/results/preprocessed_for_frontend/property_selection.csv index bf109ef4..541f5cc0 100644 --- a/benchmark/results/preprocessed_for_frontend/property_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/property_selection.csv @@ -2,10 +2,13 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-4,13.0,17.0,0.7647058823529411,2 gpt-3.5-turbo,11.0,17.0,0.6470588235294118,2 llama-2-chat:7:ggufv2:Q2_K,4.0,17.0,0.23529411764705882,2 -llama-2-chat:7:ggufv2:Q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_0,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,0.0,10.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q8_0,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q6_K,0.0,6.0,0.0,2 @@ -13,8 +16,14 @@ llama-2-chat:7:ggufv2:Q5_K_M,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q5_0,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q4_K_S,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 -llama-2-chat:7:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_1,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q4_0,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q2_K,0.0,6.0,0.0,2 +llama-2-chat:70:ggufv2:Q5_K_M,0.0,6.0,0.0,2 +llama-2-chat:70:ggufv2:Q4_K_M,0.0,17.0,0.0,2 +llama-2-chat:70:ggufv2:Q3_K_M,0.0,10.0,0.0,2 +llama-2-chat:70:ggufv2:Q2_K,0.0,10.0,0.0,2 llama-2-chat:13:ggufv2:Q8_0,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q6_K,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q5_K_M,0.0,6.0,0.0,2 @@ -22,7 +31,4 @@ llama-2-chat:13:ggufv2:Q5_0,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q4_K_S,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q4_K_M,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q4_1,0.0,6.0,0.0,2 -llama-2-chat:13:ggufv2:Q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggufv2:Q3_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggufv2:Q2_K,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/query_generation.csv b/benchmark/results/preprocessed_for_frontend/query_generation.csv index 3600570e..60fbfe9a 100644 --- a/benchmark/results/preprocessed_for_frontend/query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/query_generation.csv @@ -1,28 +1,34 @@ Model name,Passed test cases,Total test cases,Score,Iterations mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,12.0,16.0,0.75,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,12.0,16.0,0.75,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,12.0,16.0,0.75,2 gpt-4,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,11.5,16.0,0.71875,2 -llama-2-chat:13:ggufv2:Q5_K_M,11.0,16.0,0.6875,2 -llama-2-chat:13:ggufv2:Q8_0,11.0,16.0,0.6875,2 llama-2-chat:13:ggufv2:Q6_K,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q8_0,11.0,16.0,0.6875,2 gpt-3.5-turbo,11.0,16.0,0.6875,2 -llama-2-chat:13:ggufv2:Q5_0,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q5_K_M,11.0,16.0,0.6875,2 llama-2-chat:13:ggufv2:Q3_K_M,11.0,16.0,0.6875,2 -llama-2-chat:13:ggufv2:Q4_K_M,11.0,16.0,0.6875,2 llama-2-chat:13:ggufv2:Q4_1,11.0,16.0,0.6875,2 -llama-2-chat:7:ggufv2:Q6_K,10.5,16.0,0.65625,2 +llama-2-chat:13:ggufv2:Q4_K_M,11.0,16.0,0.6875,2 +llama-2-chat:13:ggufv2:Q5_0,11.0,16.0,0.6875,2 llama-2-chat:7:ggufv2:Q3_K_M,10.5,16.0,0.65625,2 -llama-2-chat:7:ggufv2:Q4_K_S,10.0,16.0,0.625,2 -llama-2-chat:13:ggufv2:Q4_K_S,10.0,16.0,0.625,2 +llama-2-chat:7:ggufv2:Q6_K,10.5,16.0,0.65625,2 llama-2-chat:7:ggufv2:Q5_K_M,10.0,16.0,0.625,2 -llama-2-chat:7:ggufv2:Q4_1,10.0,16.0,0.625,2 +llama-2-chat:13:ggufv2:Q4_K_S,10.0,16.0,0.625,2 llama-2-chat:7:ggufv2:Q8_0,10.0,16.0,0.625,2 llama-2-chat:7:ggufv2:Q5_0,10.0,16.0,0.625,2 +llama-2-chat:7:ggufv2:Q4_K_S,10.0,16.0,0.625,2 +llama-2-chat:7:ggufv2:Q4_1,10.0,16.0,0.625,2 llama-2-chat:7:ggufv2:Q4_K_M,9.5,16.0,0.59375,2 +llama-2-chat:70:ggufv2:Q2_K,9.0,16.0,0.5625,2 llama-2-chat:13:ggufv2:Q4_0,9.0,16.0,0.5625,2 +llama-2-chat:70:ggufv2:Q3_K_M,9.0,16.0,0.5625,2 llama-2-chat:13:ggufv2:Q2_K,9.0,16.0,0.5625,2 llama-2-chat:7:ggufv2:Q4_0,8.0,16.0,0.5,2 +llama-2-chat:70:ggufv2:Q5_K_M,8.0,16.0,0.5,2 +llama-2-chat:70:ggufv2:Q4_K_M,8.0,16.0,0.5,2 llama-2-chat:7:ggufv2:Q2_K,7.0,16.0,0.4375,2 diff --git a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv index 1f953cb7..b7265b1c 100644 --- a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv @@ -1,28 +1,34 @@ Model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,6.0,6.0,1.0,2 gpt-4,6.0,6.0,1.0,2 +llama-2-chat:70:ggufv2:Q4_K_M,6.0,6.0,1.0,2 llama-2-chat:7:ggufv2:Q5_0,3.0,6.0,0.5,2 +llama-2-chat:70:ggufv2:Q5_K_M,3.0,6.0,0.5,2 llama-2-chat:7:ggufv2:Q3_K_M,3.0,6.0,0.5,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.5,6.0,0.25,2 -llama-2-chat:13:ggufv2:Q4_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 -mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q8_0,0.0,6.0,0.0,2 -llama-2-chat:7:ggufv2:Q6_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_1,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q5_K_M,0.0,6.0,0.0,2 -llama-2-chat:13:ggufv2:Q2_K,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q4_K_S,0.0,6.0,0.0,2 -llama-2-chat:7:ggufv2:Q4_1,0.0,6.0,0.0,2 -llama-2-chat:13:ggufv2:Q4_1,0.0,6.0,0.0,2 -llama-2-chat:7:ggufv2:Q4_0,0.0,6.0,0.0,2 -llama-2-chat:13:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,0.0,6.0,0.0,2 +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q6_K,0.0,6.0,0.0,2 llama-2-chat:7:ggufv2:Q2_K,0.0,6.0,0.0,2 +llama-2-chat:7:ggufv2:Q4_0,0.0,6.0,0.0,2 +llama-2-chat:70:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:70:ggufv2:Q2_K,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q8_0,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q6_K,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q5_K_M,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q5_0,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q4_K_S,0.0,6.0,0.0,2 llama-2-chat:13:ggufv2:Q4_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_1,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q4_0,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q3_K_M,0.0,6.0,0.0,2 +llama-2-chat:13:ggufv2:Q2_K,0.0,6.0,0.0,2 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2 diff --git a/benchmark/results/property_exists.csv b/benchmark/results/property_exists.csv index 802e8598..8dddaa9b 100644 --- a/benchmark/results/property_exists.csv +++ b/benchmark/results/property_exists.csv @@ -23,6 +23,14 @@ llama-2-chat:13:ggufv2:Q6_K,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q6_K,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggufv2:Q8_0,multi_word,1.5/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q8_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q2_K,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q2_K,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q3_K_M,single_word,1.5/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q4_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q4_K_M,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q5_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q5_K_M,single_word,2.0/2,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q2_K,multi_word,1.0/0,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q2_K,single_word,0.0/0,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q3_K_M,multi_word,2.0/2,2,f29b6faf7d003159d43a5d1cf451587f @@ -45,11 +53,15 @@ llama-2-chat:7:ggufv2:Q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q8_0,single_word,1.5/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,1.0/1,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,1.5/2,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,1.0/2,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,1.0/1,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/property_selection.csv b/benchmark/results/property_selection.csv index e0da4238..584f0ed9 100644 --- a/benchmark/results/property_selection.csv +++ b/benchmark/results/property_selection.csv @@ -23,6 +23,14 @@ llama-2-chat:13:ggufv2:Q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q2_K,multi_word,0.0/7,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q3_K_M,multi_word,0.0/7,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q4_K_M,multi_word,0.0/7,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q4_K_M,single_word,0.0/10,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q5_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q2_K,multi_word,2.0/7,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q2_K,single_word,2.0/10,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f @@ -45,11 +53,15 @@ llama-2-chat:7:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,multi_word,0.0/7,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/query_generation.csv b/benchmark/results/query_generation.csv index aec1c610..d4daf41e 100644 --- a/benchmark/results/query_generation.csv +++ b/benchmark/results/query_generation.csv @@ -23,6 +23,14 @@ llama-2-chat:13:ggufv2:Q6_K,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q6_K,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggufv2:Q8_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q8_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q2_K,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q2_K,single_word,4.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q3_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q3_K_M,single_word,4.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q4_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q4_K_M,single_word,3.0/8,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q5_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q5_K_M,single_word,3.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q2_K,multi_word,4.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q2_K,single_word,3.0/8,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q3_K_M,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f @@ -45,11 +53,15 @@ llama-2-chat:7:ggufv2:Q8_0,multi_word,5.0/8,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q8_0,single_word,5.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,5.5/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,6.0/8,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,6.0/8,2,e447d738f5e035cde60d624eadb79fec diff --git a/benchmark/results/relationship_selection.csv b/benchmark/results/relationship_selection.csv index 7704e2ef..52209bcf 100644 --- a/benchmark/results/relationship_selection.csv +++ b/benchmark/results/relationship_selection.csv @@ -23,6 +23,14 @@ llama-2-chat:13:ggufv2:Q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:13:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:13:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q4_K_M,multi_word,3.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q4_K_M,single_word,3.0/3,2,e447d738f5e035cde60d624eadb79fec +llama-2-chat:70:ggufv2:Q5_K_M,multi_word,3.0/3,2,f29b6faf7d003159d43a5d1cf451587f +llama-2-chat:70:ggufv2:Q5_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec llama-2-chat:7:ggufv2:Q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f @@ -45,11 +53,15 @@ llama-2-chat:7:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f llama-2-chat:7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q3_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q4_K_M,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,multi_word,1.5/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f +mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,multi_word,0.0/3,2,f29b6faf7d003159d43a5d1cf451587f mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,single_word,0.0/3,2,e447d738f5e035cde60d624eadb79fec From 6869a74946d8483ae570eceb344ce5d012cf3966 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Mon, 5 Feb 2024 20:01:28 +0100 Subject: [PATCH 31/32] add table aggregated for entire models --- .../end_to_end_query_generation.csv | 2 +- .../entity_selection.csv | 2 +- ...explicit_relevance_of_single_fragments.csv | 2 +- ...plicit_relevance_of_multiple_fragments.csv | 2 +- .../overview-model.csv | 7 ++ .../overview-quantisation.csv | 34 +++++++++ .../overview-size.csv | 7 ++ .../preprocessed_for_frontend/overview.csv | 2 +- .../property_exists.csv | 2 +- .../property_selection.csv | 2 +- .../query_generation.csv | 2 +- .../relationship_selection.csv | 2 +- docs/benchmark-overview.md | 13 +++- docs/scripts/hooks.py | 69 ++++++++++++++----- 14 files changed, 120 insertions(+), 28 deletions(-) create mode 100644 benchmark/results/preprocessed_for_frontend/overview-model.csv create mode 100644 benchmark/results/preprocessed_for_frontend/overview-quantisation.csv create mode 100644 benchmark/results/preprocessed_for_frontend/overview-size.csv diff --git a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv index 9efe2c49..ec55bfa8 100644 --- a/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/end_to_end_query_generation.csv @@ -1,4 +1,4 @@ -Model name,Passed test cases,Total test cases,Score,Iterations +Full model name,Passed test cases,Total test cases,Score,Iterations gpt-4,11.5,16.0,0.71875,2 gpt-3.5-turbo,11.0,16.0,0.6875,2 mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,0.0,16.0,0.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/entity_selection.csv b/benchmark/results/preprocessed_for_frontend/entity_selection.csv index 16f2d979..599eb27a 100644 --- a/benchmark/results/preprocessed_for_frontend/entity_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/entity_selection.csv @@ -1,4 +1,4 @@ -Model name,Passed test cases,Total test cases,Score,Iterations +Full model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,4.0,4.0,1.0,2 gpt-4,4.0,4.0,1.0,2 llama-2-chat:70:ggufv2:Q5_K_M,4.0,5.0,0.8,2 diff --git a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv index 9f44ca4a..5590bfa7 100644 --- a/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/explicit_relevance_of_single_fragments.csv @@ -1,4 +1,4 @@ -Model name,Passed test cases,Total test cases,Score,Iterations +Full model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,6.0,6.0,1.0,2 llama-2-chat:70:ggufv2:Q2_K,6.0,6.0,1.0,2 llama-2-chat:7:ggufv2:Q4_K_S,6.0,6.0,1.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv index a25c00c6..b95ec5b8 100644 --- a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv +++ b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv @@ -1,4 +1,4 @@ -Model name,Passed test cases,Total test cases,Score,Iterations +Full model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,2.0,2.0,1.0,2 llama-2-chat:70:ggufv2:Q4_K_M,2.0,2.0,1.0,2 llama-2-chat:13:ggufv2:Q2_K,2.0,2.0,1.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/overview-model.csv b/benchmark/results/preprocessed_for_frontend/overview-model.csv new file mode 100644 index 00000000..05b5ffa8 --- /dev/null +++ b/benchmark/results/preprocessed_for_frontend/overview-model.csv @@ -0,0 +1,7 @@ +Model name,Size,Mean,SD +gpt-4,Unknown,0.9,0.12 +gpt-3.5-turbo,175,0.88,0.16 +llama-2-chat,70,0.49,0.39 +llama-2-chat,7,0.43,0.35 +llama-2-chat,13,0.39,0.42 +mixtral-instruct-v0.1,"46,7",0.32,0.35 diff --git a/benchmark/results/preprocessed_for_frontend/overview-quantisation.csv b/benchmark/results/preprocessed_for_frontend/overview-quantisation.csv new file mode 100644 index 00000000..d1619534 --- /dev/null +++ b/benchmark/results/preprocessed_for_frontend/overview-quantisation.csv @@ -0,0 +1,34 @@ +Model name,Size,Version,Quantisation,Mean,SD +gpt-4,Unknown,,,0.9,0.12 +gpt-3.5-turbo,175,,,0.88,0.16 +llama-2-chat,70,ggufv2,Q4_K_M,0.66,0.41 +llama-2-chat,70,ggufv2,Q5_K_M,0.57,0.37 +llama-2-chat,7,ggufv2,Q3_K_M,0.52,0.34 +llama-2-chat,7,ggufv2,Q5_0,0.49,0.32 +llama-2-chat,7,ggufv2,Q5_K_M,0.47,0.38 +llama-2-chat,13,ggufv2,Q5_0,0.46,0.53 +llama-2-chat,7,ggufv2,Q4_0,0.45,0.39 +llama-2-chat,7,ggufv2,Q4_K_S,0.44,0.37 +llama-2-chat,13,ggufv2,Q4_1,0.43,0.48 +llama-2-chat,7,ggufv2,Q4_1,0.42,0.35 +llama-2-chat,13,ggufv2,Q4_0,0.41,0.4 +llama-2-chat,7,ggufv2,Q4_K_M,0.41,0.35 +llama-2-chat,7,ggufv2,Q8_0,0.4,0.34 +llama-2-chat,13,ggufv2,Q5_K_M,0.4,0.43 +llama-2-chat,70,ggufv2,Q2_K,0.38,0.42 +llama-2-chat,13,ggufv2,Q4_K_S,0.38,0.39 +llama-2-chat,13,ggufv2,Q8_0,0.38,0.4 +llama-2-chat,13,ggufv2,Q4_K_M,0.38,0.4 +llama-2-chat,13,ggufv2,Q6_K,0.38,0.4 +llama-2-chat,7,ggufv2,Q6_K,0.36,0.34 +mixtral-instruct-v0.1,"46,7",ggufv2,Q2_K,0.35,0.39 +mixtral-instruct-v0.1,"46,7",ggufv2,Q4_0,0.35,0.38 +mixtral-instruct-v0.1,"46,7",ggufv2,Q4_K_M,0.35,0.37 +llama-2-chat,70,ggufv2,Q3_K_M,0.35,0.37 +llama-2-chat,13,ggufv2,Q3_K_M,0.34,0.35 +mixtral-instruct-v0.1,"46,7",ggufv2,Q5_0,0.33,0.28 +llama-2-chat,7,ggufv2,Q2_K,0.33,0.3 +mixtral-instruct-v0.1,"46,7",ggufv2,Q6_K,0.3,0.4 +llama-2-chat,13,ggufv2,Q2_K,0.3,0.4 +mixtral-instruct-v0.1,"46,7",ggufv2,Q3_K_M,0.28,0.3 +mixtral-instruct-v0.1,"46,7",ggufv2,Q8_0,0.28,0.34 diff --git a/benchmark/results/preprocessed_for_frontend/overview-size.csv b/benchmark/results/preprocessed_for_frontend/overview-size.csv new file mode 100644 index 00000000..05b5ffa8 --- /dev/null +++ b/benchmark/results/preprocessed_for_frontend/overview-size.csv @@ -0,0 +1,7 @@ +Model name,Size,Mean,SD +gpt-4,Unknown,0.9,0.12 +gpt-3.5-turbo,175,0.88,0.16 +llama-2-chat,70,0.49,0.39 +llama-2-chat,7,0.43,0.35 +llama-2-chat,13,0.39,0.42 +mixtral-instruct-v0.1,"46,7",0.32,0.35 diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv index bf526988..d7b89d65 100644 --- a/benchmark/results/preprocessed_for_frontend/overview.csv +++ b/benchmark/results/preprocessed_for_frontend/overview.csv @@ -1,4 +1,4 @@ -Model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean,SD +Full model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean,SD gpt-4,0.7647058823529411,0.75,1.0,1.0,1.0,0.71875,1.0,1.0,0.9041819852941176,0.12425585940001706 gpt-3.5-turbo,0.6470588235294118,0.6875,1.0,1.0,1.0,0.6875,1.0,1.0,0.8777573529411764,0.1582457961403944 llama-2-chat:70:ggufv2:Q4_K_M,0.0,0.5,1.0,0.75,1.0,0.0,1.0,1.0,0.65625,0.41339864235384227 diff --git a/benchmark/results/preprocessed_for_frontend/property_exists.csv b/benchmark/results/preprocessed_for_frontend/property_exists.csv index 72586205..7ffbca4b 100644 --- a/benchmark/results/preprocessed_for_frontend/property_exists.csv +++ b/benchmark/results/preprocessed_for_frontend/property_exists.csv @@ -1,4 +1,4 @@ -Model name,Passed test cases,Total test cases,Score,Iterations +Full model name,Passed test cases,Total test cases,Score,Iterations llama-2-chat:13:ggufv2:Q5_0,1.5,1.0,1.5,2 llama-2-chat:13:ggufv2:Q4_1,2.5,2.0,1.25,2 gpt-3.5-turbo,4.0,4.0,1.0,2 diff --git a/benchmark/results/preprocessed_for_frontend/property_selection.csv b/benchmark/results/preprocessed_for_frontend/property_selection.csv index 541f5cc0..c45e02c7 100644 --- a/benchmark/results/preprocessed_for_frontend/property_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/property_selection.csv @@ -1,4 +1,4 @@ -Model name,Passed test cases,Total test cases,Score,Iterations +Full model name,Passed test cases,Total test cases,Score,Iterations gpt-4,13.0,17.0,0.7647058823529411,2 gpt-3.5-turbo,11.0,17.0,0.6470588235294118,2 llama-2-chat:7:ggufv2:Q2_K,4.0,17.0,0.23529411764705882,2 diff --git a/benchmark/results/preprocessed_for_frontend/query_generation.csv b/benchmark/results/preprocessed_for_frontend/query_generation.csv index 60fbfe9a..c911cbf9 100644 --- a/benchmark/results/preprocessed_for_frontend/query_generation.csv +++ b/benchmark/results/preprocessed_for_frontend/query_generation.csv @@ -1,4 +1,4 @@ -Model name,Passed test cases,Total test cases,Score,Iterations +Full model name,Passed test cases,Total test cases,Score,Iterations mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q6_K,12.0,16.0,0.75,2 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,12.0,16.0,0.75,2 diff --git a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv index b7265b1c..d2956831 100644 --- a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv +++ b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv @@ -1,4 +1,4 @@ -Model name,Passed test cases,Total test cases,Score,Iterations +Full model name,Passed test cases,Total test cases,Score,Iterations gpt-3.5-turbo,6.0,6.0,1.0,2 gpt-4,6.0,6.0,1.0,2 llama-2-chat:70:ggufv2:Q4_K_M,6.0,6.0,1.0,2 diff --git a/docs/benchmark-overview.md b/docs/benchmark-overview.md index 07471974..26b5f429 100644 --- a/docs/benchmark-overview.md +++ b/docs/benchmark-overview.md @@ -1,13 +1,20 @@ # Benchmark Results - Overview -## Aggregated scores +## Scores per model Table sorted by mean score in descending order. Click the column names to reorder. -{{ read_csv('benchmark/results/preprocessed_for_frontend/overview-aggregated.csv', colalign=("center","center","center","center","center")) }} +{{ read_csv('benchmark/results/preprocessed_for_frontend/overview-model.csv', colalign=("center","center","center")) }} -## Including all tasks +## Scores per quantisation + +Table sorted by mean score in descending order. +Click the column names to reorder. + +{{ read_csv('benchmark/results/preprocessed_for_frontend/overview-quantisation.csv', colalign=("center","center","center","center","center")) }} + +## Scores of all tasks Wide table; you may need to scroll horizontally to see all columns. Table sorted by mean score in descending order. diff --git a/docs/scripts/hooks.py b/docs/scripts/hooks.py index 8e9b6a4b..7eb07b8f 100644 --- a/docs/scripts/hooks.py +++ b/docs/scripts/hooks.py @@ -75,8 +75,8 @@ def preprocess_results_for_frontend( axis=1, ) - aggregated_scores["Model name"] = aggregated_scores.index.get_level_values( - "model_name" + aggregated_scores["Full model name"] = ( + aggregated_scores.index.get_level_values("model_name") ) aggregated_scores["Passed test cases"] = aggregated_scores[ "passed_test_cases" @@ -86,7 +86,7 @@ def preprocess_results_for_frontend( ] aggregated_scores["Iterations"] = aggregated_scores["iterations"] new_order = [ - "Model name", + "Full model name", "Passed test cases", "Total test cases", "Score", @@ -117,29 +117,42 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]): ) file_name_without_extension = os.path.splitext(file)[0] subtask_result[file_name_without_extension] = subtask_result["Score"] - subtask_result.set_index("Model name", inplace=True) + subtask_result.set_index("Full model name", inplace=True) subtask_result = subtask_result[file_name_without_extension] subtask_results.append(subtask_result) overview = pd.concat(subtask_results, axis=1) overview["Mean"] = overview.mean(axis=1) overview["SD"] = overview.std(axis=1) overview = overview.sort_values(by="Mean", ascending=False) - # split "Model name" at : to get Model name, size, version, and quantisation + # split "Full model name" at : to get Model name, size, version, and quantisation overview.to_csv( f"{result_files_path}preprocessed_for_frontend/overview.csv", index=True, ) - overview_aggregated = overview - overview_aggregated["Model name"] = overview_aggregated.index - overview_aggregated[["Model name", "Size", "Version", "Quantisation"]] = ( - overview_aggregated["Model name"].str.split(":", expand=True) + overview_per_quantisation = overview + overview_per_quantisation["Full model name"] = ( + overview_per_quantisation.index ) + overview_per_quantisation[ + ["Model name", "Size", "Version", "Quantisation"] + ] = overview_per_quantisation["Full model name"].str.split(":", expand=True) # convert underscores in Size to commas - overview_aggregated["Size"] = overview_aggregated["Size"].str.replace( - "_", "," + overview_per_quantisation["Size"] = overview_per_quantisation[ + "Size" + ].str.replace("_", ",") + # add size 175 for gpt-3.5-turbo and Unknown for gpt-4 + overview_per_quantisation["Size"] = overview_per_quantisation.apply( + lambda row: ( + "175" if row["Model name"] == "gpt-3.5-turbo" else row["Size"] + ), + axis=1, ) - overview_aggregated = overview_aggregated[ + overview_per_quantisation["Size"] = overview_per_quantisation.apply( + lambda row: "Unknown" if row["Model name"] == "gpt-4" else row["Size"], + axis=1, + ) + overview_per_quantisation = overview_per_quantisation[ [ "Model name", "Size", @@ -150,13 +163,37 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]): ] ] # round mean and sd to 2 decimal places - overview_aggregated["Mean"] = overview_aggregated["Mean"].round(2) - overview_aggregated["SD"] = overview_aggregated["SD"].round(2) - overview_aggregated.to_csv( - f"{result_files_path}preprocessed_for_frontend/overview-aggregated.csv", + overview_per_quantisation["Mean"] = overview_per_quantisation["Mean"].round( + 2 + ) + overview_per_quantisation["SD"] = overview_per_quantisation["SD"].round(2) + overview_per_quantisation.to_csv( + f"{result_files_path}preprocessed_for_frontend/overview-quantisation.csv", index=False, ) + # group by model name and size, aggregate different quantisations + # keep models that do not have sizes + overview_per_size = overview_per_quantisation.groupby( + ["Model name", "Size"] + ).agg( + { + "Mean": "mean", + "SD": "mean", + } + ) + # round mean and SD to 2 decimal places + overview_per_size["Mean"] = overview_per_size["Mean"].round(2) + overview_per_size["SD"] = overview_per_size["SD"].round(2) + # sort by mean, descending + overview_per_size = overview_per_size.sort_values( + by="Mean", ascending=False + ) + overview_per_size.to_csv( + f"{result_files_path}preprocessed_for_frontend/overview-model.csv", + index=True, + ) + if __name__ == "__main__": on_pre_build(None) From 0b9a2a8e4b47cf9757bc96a05edade43dcc40e46 Mon Sep 17 00:00:00 2001 From: slobentanzer Date: Tue, 6 Feb 2024 01:04:26 +0100 Subject: [PATCH 32/32] remove redundant lists --- benchmark/conftest.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/benchmark/conftest.py b/benchmark/conftest.py index c182cce5..f564c046 100644 --- a/benchmark/conftest.py +++ b/benchmark/conftest.py @@ -262,32 +262,27 @@ def result_files(): if file.endswith(".csv") ] result_files = {} + result_columns = [ + "model_name", + "subtask", + "score", + "iterations", + "md5_hash", + ] for file in RESULT_FILES: try: result_file = pd.read_csv(file, header=0) except (pd.errors.EmptyDataError, FileNotFoundError): result_file = pd.DataFrame( - columns=[ - "model_name", - "subtask", - "score", - "iterations", - "md5_hash", - ] + columns=result_columns, ) result_file.to_csv(file, index=False) if not np.array_equal( result_file.columns, - ["model_name", "subtask", "score", "iterations", "md5_hash"], + result_columns, ): - result_file.columns = [ - "model_name", - "subtask", - "score", - "iterations", - "md5_hash", - ] + result_file.columns = result_columns result_files[file] = result_file