diff --git a/backend/tests/evaluation/compare_scores.py b/backend/tests/evaluation/compare_scores.py index 0c06e53..efc8b89 100644 --- a/backend/tests/evaluation/compare_scores.py +++ b/backend/tests/evaluation/compare_scores.py @@ -1,4 +1,5 @@ import sys +from pathlib import Path import pandas as pd @@ -25,6 +26,21 @@ def read_markdown_table(file_path): return df +def get_emoji(difference): + if abs(difference) < 1: + return "✅" # Green checkmark for small differences + elif difference > 0: + if difference > 5: + return "🚀" # Rocket for significant improvements + else: + return "📈" # Chart with upwards trend for improvements + else: + if difference < -5: + return "⚠️" # Warning sign for significant regressions + else: + return "📉" # Chart with downwards trend for regressions + + def compare_scores(current_scores_path, main_scores_path): # Read current scores try: @@ -59,15 +75,23 @@ def compare_scores(current_scores_path, main_scores_path): (merged_df["Score_current"] - merged_df["Score_main"]) / merged_df["Score_main"] * 100 ).round(2) + # Add emoji column + merged_df["Emoji"] = merged_df["Difference"].apply(get_emoji) + # Prepare output dataframe - output_df = merged_df[["Tag", "Score_current", "Score_main", "Difference"]] - output_df.columns = ["Tag", "Current Score", "Main Score", "Difference (%)"] + output_df = merged_df[["Tag", "Score_current", "Score_main", "Difference", "Emoji"]] + output_df.columns = ["Tag", "PR Score", "Main Branch Score", "Difference (%)", "Status"] output_df = output_df.fillna("N/A") # Write to markdown file with open("comparison.md", "w") as f: f.write("## LLM Evaluation Score Comparison\n\n") - output_df.to_markdown(index=False, buf=f, mode="w") + f.write("| Tag | PR Score | Main Branch Score | Difference (%) | Status |\n") + f.write("|-----|----------|-------------------|----------------|--------|\n") + for _, row in output_df.iterrows(): + f.write( + f"| {row['Tag']} | {row['PR Score']:.2f} | {row['Main Branch Score']:.2f} | {row['Difference (%)']:+.2f}% | {row['Status']} |\n" + ) if __name__ == "__main__":