alternative visualisations

biocypher · Aug 22, 2024 · 9296287 · 9296287
1 parent 6bb6072
commit 9296287
Show file tree

Hide file tree

Showing 16 changed files with 69 additions and 18 deletions.
diff --git a/docs/images/boxplot-naive-vs-biochatter.pdf b/docs/images/boxplot-naive-vs-biochatter.pdf
diff --git a/docs/images/dotplot-per-task.pdf b/docs/images/dotplot-per-task.pdf
diff --git a/docs/images/dotplot-per-task.png b/docs/images/dotplot-per-task.png
diff --git a/docs/images/scatter-naive-vs-biochatter.pdf b/docs/images/scatter-naive-vs-biochatter.pdf
diff --git a/docs/images/scatter-naive-vs-biochatter.png b/docs/images/scatter-naive-vs-biochatter.png
diff --git a/docs/images/scatter-per-quantisation-name.pdf b/docs/images/scatter-per-quantisation-name.pdf
diff --git a/docs/images/scatter-per-quantisation-name.png b/docs/images/scatter-per-quantisation-name.png
diff --git a/docs/images/scatter-quantisation-accuracy.pdf b/docs/images/scatter-quantisation-accuracy.pdf
diff --git a/docs/images/scatter-size-accuracy.pdf b/docs/images/scatter-size-accuracy.pdf
diff --git a/docs/images/stripplot-extraction-tasks.png b/docs/images/stripplot-extraction-tasks.png
diff --git a/docs/images/stripplot-per-model.png b/docs/images/stripplot-per-model.png
diff --git a/docs/images/stripplot-rag-tasks.pdf b/docs/images/stripplot-rag-tasks.pdf
diff --git a/docs/images/stripplot-rag-tasks.png b/docs/images/stripplot-rag-tasks.png
diff --git a/docs/images/violin-naive-vs-biochatter.pdf b/docs/images/violin-naive-vs-biochatter.pdf
diff --git a/docs/images/violin-naive-vs-biochatter.png b/docs/images/violin-naive-vs-biochatter.png
diff --git a/docs/scripts/hooks.py b/docs/scripts/hooks.py
@@ -117,9 +117,7 @@ def plot_text2cypher():
             else (
                 "llama-3"
                 if "llama-3" in x
-                else "gpt"
-                if "gpt" in x
-                else "other open source"
+                else "gpt" if "gpt" in x else "other open source"
             )
         )
     )
@@ -261,9 +259,9 @@ def preprocess_results_for_frontend(
         axis=1,
     )
 
-    aggregated_scores[
-        "Full model name"
-    ] = aggregated_scores.index.get_level_values("model_name")
+    aggregated_scores["Full model name"] = (
+        aggregated_scores.index.get_level_values("model_name")
+    )
     aggregated_scores["Score achieved"] = aggregated_scores["score_achieved"]
     aggregated_scores["Score possible"] = aggregated_scores["score_possible"]
     aggregated_scores["Score SD"] = aggregated_scores["score_sd"]
@@ -333,9 +331,9 @@ def write_individual_extraction_task_results(raw_results: pd.DataFrame) -> None:
         axis=1,
     )
 
-    aggregated_scores[
-        "Full model name"
-    ] = aggregated_scores.index.get_level_values("model_name")
+    aggregated_scores["Full model name"] = (
+        aggregated_scores.index.get_level_values("model_name")
+    )
     aggregated_scores["Subtask"] = aggregated_scores.index.get_level_values(
         "subtask"
     )
@@ -392,9 +390,9 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]):
     )
 
     overview_per_quantisation = overview
-    overview_per_quantisation[
-        "Full model name"
-    ] = overview_per_quantisation.index
+    overview_per_quantisation["Full model name"] = (
+        overview_per_quantisation.index
+    )
     overview_per_quantisation[
         ["Model name", "Size", "Version", "Quantisation"]
     ] = overview_per_quantisation["Full model name"].str.split(":", expand=True)
@@ -428,9 +426,9 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]):
         ]
     ]
     # round mean and sd to 2 decimal places
-    overview_per_quantisation.loc[
-        :, "Median Accuracy"
-    ] = overview_per_quantisation["Median Accuracy"].round(2)
+    overview_per_quantisation.loc[:, "Median Accuracy"] = (
+        overview_per_quantisation["Median Accuracy"].round(2)
+    )
     overview_per_quantisation.loc[:, "SD"] = overview_per_quantisation[
         "SD"
     ].round(2)
@@ -878,9 +876,9 @@ def plot_extraction_tasks():
         axis=1,
     )
 
-    aggregated_scores[
-        "Full model name"
-    ] = aggregated_scores.index.get_level_values("model_name")
+    aggregated_scores["Full model name"] = (
+        aggregated_scores.index.get_level_values("model_name")
+    )
     aggregated_scores["Subtask"] = aggregated_scores.index.get_level_values(
         "subtask"
     )
@@ -1093,6 +1091,9 @@ def plot_comparison_naive_biochatter(overview):
         )
     ]
 
+    # print number of rows of each task
+    print(overview_melted["Task"].value_counts())
+
     sns.set_theme(style="whitegrid")
     plt.figure(figsize=(6, 4))
     sns.boxplot(
@@ -1118,6 +1119,56 @@ def plot_comparison_naive_biochatter(overview):
     )
     plt.close()
 
+    # plot scatter plot
+    plt.figure(figsize=(6, 4))
+    sns.stripplot(
+        x="Task",
+        y="Accuracy",
+        data=overview_melted,
+        jitter=0.2,
+        alpha=0.8,
+    )
+    plt.ylim(0, 1)
+    plt.xlabel(None)
+    plt.xticks(
+        ticks=range(len(overview_melted["Task"].unique())),
+        labels=["BioChatter", "Naive LLM (using full YAML schema)"],
+    )
+    plt.savefig(
+        "docs/images/scatter-naive-vs-biochatter.png",
+        bbox_inches="tight",
+        dpi=300,
+    )
+    plt.savefig(
+        "docs/images/scatter-naive-vs-biochatter.pdf",
+        bbox_inches="tight",
+    )
+    plt.close()
+
+    # plit violin plot
+    plt.figure(figsize=(6, 4))
+    sns.violinplot(
+        x="Task",
+        y="Accuracy",
+        data=overview_melted,
+    )
+    plt.ylim(0, 1)
+    plt.xlabel(None)
+    plt.xticks(
+        ticks=range(len(overview_melted["Task"].unique())),
+        labels=["BioChatter", "Naive LLM (using full YAML schema)"],
+    )
+    plt.savefig(
+        "docs/images/violin-naive-vs-biochatter.png",
+        bbox_inches="tight",
+        dpi=300,
+    )
+    plt.savefig(
+        "docs/images/violin-naive-vs-biochatter.pdf",
+        bbox_inches="tight",
+    )
+    plt.close()
+
 
 def calculate_stats(overview):
     overview_melted = melt_and_process(overview)