Skip to content

Commit

Permalink
alternative visualisations
Browse files Browse the repository at this point in the history
  • Loading branch information
slobentanzer committed Aug 22, 2024
1 parent 6bb6072 commit 9296287
Show file tree
Hide file tree
Showing 16 changed files with 69 additions and 18 deletions.
Binary file modified docs/images/boxplot-naive-vs-biochatter.pdf
Binary file not shown.
Binary file modified docs/images/dotplot-per-task.pdf
Binary file not shown.
Binary file modified docs/images/dotplot-per-task.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/scatter-naive-vs-biochatter.pdf
Binary file not shown.
Binary file added docs/images/scatter-naive-vs-biochatter.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/scatter-per-quantisation-name.pdf
Binary file not shown.
Binary file modified docs/images/scatter-per-quantisation-name.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/scatter-quantisation-accuracy.pdf
Binary file not shown.
Binary file modified docs/images/scatter-size-accuracy.pdf
Binary file not shown.
Binary file modified docs/images/stripplot-extraction-tasks.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/stripplot-per-model.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/stripplot-rag-tasks.pdf
Binary file not shown.
Binary file modified docs/images/stripplot-rag-tasks.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/violin-naive-vs-biochatter.pdf
Binary file not shown.
Binary file added docs/images/violin-naive-vs-biochatter.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
87 changes: 69 additions & 18 deletions docs/scripts/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,7 @@ def plot_text2cypher():
else (
"llama-3"
if "llama-3" in x
else "gpt"
if "gpt" in x
else "other open source"
else "gpt" if "gpt" in x else "other open source"
)
)
)
Expand Down Expand Up @@ -261,9 +259,9 @@ def preprocess_results_for_frontend(
axis=1,
)

aggregated_scores[
"Full model name"
] = aggregated_scores.index.get_level_values("model_name")
aggregated_scores["Full model name"] = (
aggregated_scores.index.get_level_values("model_name")
)
aggregated_scores["Score achieved"] = aggregated_scores["score_achieved"]
aggregated_scores["Score possible"] = aggregated_scores["score_possible"]
aggregated_scores["Score SD"] = aggregated_scores["score_sd"]
Expand Down Expand Up @@ -333,9 +331,9 @@ def write_individual_extraction_task_results(raw_results: pd.DataFrame) -> None:
axis=1,
)

aggregated_scores[
"Full model name"
] = aggregated_scores.index.get_level_values("model_name")
aggregated_scores["Full model name"] = (
aggregated_scores.index.get_level_values("model_name")
)
aggregated_scores["Subtask"] = aggregated_scores.index.get_level_values(
"subtask"
)
Expand Down Expand Up @@ -392,9 +390,9 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]):
)

overview_per_quantisation = overview
overview_per_quantisation[
"Full model name"
] = overview_per_quantisation.index
overview_per_quantisation["Full model name"] = (
overview_per_quantisation.index
)
overview_per_quantisation[
["Model name", "Size", "Version", "Quantisation"]
] = overview_per_quantisation["Full model name"].str.split(":", expand=True)
Expand Down Expand Up @@ -428,9 +426,9 @@ def create_overview_table(result_files_path: str, result_file_names: list[str]):
]
]
# round mean and sd to 2 decimal places
overview_per_quantisation.loc[
:, "Median Accuracy"
] = overview_per_quantisation["Median Accuracy"].round(2)
overview_per_quantisation.loc[:, "Median Accuracy"] = (
overview_per_quantisation["Median Accuracy"].round(2)
)
overview_per_quantisation.loc[:, "SD"] = overview_per_quantisation[
"SD"
].round(2)
Expand Down Expand Up @@ -878,9 +876,9 @@ def plot_extraction_tasks():
axis=1,
)

aggregated_scores[
"Full model name"
] = aggregated_scores.index.get_level_values("model_name")
aggregated_scores["Full model name"] = (
aggregated_scores.index.get_level_values("model_name")
)
aggregated_scores["Subtask"] = aggregated_scores.index.get_level_values(
"subtask"
)
Expand Down Expand Up @@ -1093,6 +1091,9 @@ def plot_comparison_naive_biochatter(overview):
)
]

# print number of rows of each task
print(overview_melted["Task"].value_counts())

sns.set_theme(style="whitegrid")
plt.figure(figsize=(6, 4))
sns.boxplot(
Expand All @@ -1118,6 +1119,56 @@ def plot_comparison_naive_biochatter(overview):
)
plt.close()

# plot scatter plot
plt.figure(figsize=(6, 4))
sns.stripplot(
x="Task",
y="Accuracy",
data=overview_melted,
jitter=0.2,
alpha=0.8,
)
plt.ylim(0, 1)
plt.xlabel(None)
plt.xticks(
ticks=range(len(overview_melted["Task"].unique())),
labels=["BioChatter", "Naive LLM (using full YAML schema)"],
)
plt.savefig(
"docs/images/scatter-naive-vs-biochatter.png",
bbox_inches="tight",
dpi=300,
)
plt.savefig(
"docs/images/scatter-naive-vs-biochatter.pdf",
bbox_inches="tight",
)
plt.close()

# plit violin plot
plt.figure(figsize=(6, 4))
sns.violinplot(
x="Task",
y="Accuracy",
data=overview_melted,
)
plt.ylim(0, 1)
plt.xlabel(None)
plt.xticks(
ticks=range(len(overview_melted["Task"].unique())),
labels=["BioChatter", "Naive LLM (using full YAML schema)"],
)
plt.savefig(
"docs/images/violin-naive-vs-biochatter.png",
bbox_inches="tight",
dpi=300,
)
plt.savefig(
"docs/images/violin-naive-vs-biochatter.pdf",
bbox_inches="tight",
)
plt.close()


def calculate_stats(overview):
overview_melted = melt_and_process(overview)
Expand Down

0 comments on commit 9296287

Please sign in to comment.