Skip to content

Commit

Permalink
issue #14: add top results table
Browse files Browse the repository at this point in the history
  • Loading branch information
ibrahim-kabir committed Apr 3, 2024
1 parent 4b3c473 commit 9a5838b
Showing 1 changed file with 42 additions and 22 deletions.
64 changes: 42 additions & 22 deletions finesse/accuracy_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,36 @@ def calculate_accuracy(responses_url: list[str], expected_url: list | str) -> Ac

return AccuracyResult(position, total_pages, score)

def count_top_results(test_data, num_results, accuracy_type):
"""
Counts the number of correct URLs in the top results based on the specified accuracy type.
Args:
test_data (dict): A dictionary containing the test data.
num_results (int): The number of top results to consider.
accuracy_type (str): The type of accuracy to consider ex: "accuracy", "bing_accuracy", or "bing_filtered_accuracy".
Returns:
int: The count of correct URLs in the top results.
"""
count = 0
for key, value in test_data.items():
accuracy = value.get(accuracy_type)
if accuracy > 1.0 - (num_results/100):
count += 1
return count

def save_to_markdown(test_data: dict, engine: str):

if not os.path.exists(OUTPUT_FOLDER):
os.makedirs(OUTPUT_FOLDER)
date_string = datetime.datetime.now().strftime("%Y-%m-%d")
file_name = f"test_{engine}_{date_string}.md"
output_file = os.path.join(OUTPUT_FOLDER, file_name)
with open(output_file, "w") as md_file:
md_file.write(f"# Test on the {engine} search engine: {date_string}\n\n")
md_file.write(f"# Test on the {engine.title()} search engine: {date_string}\n\n")
md_file.write("## Test data table\n\n")
md_file.write("| 📄 File | 💬 Question| 🔎 Finesse Accuracy Score | 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |⌛ Finesse Time | ⌛ Bing Time | ⌛ Filtered Bing Time |\n")
md_file.write(f"| 📄 File | 💬 Question| 🔎 {engine.title()} Accuracy Score | 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |⌛ {engine.title()} Time | ⌛ Bing Time | ⌛ Filtered Bing Time |\n")
md_file.write("|---|---|---|---|---|---|---|---|\n")
for key, value in test_data.items():
question = ""
Expand All @@ -76,35 +96,35 @@ def save_to_markdown(test_data: dict, engine: str):

time_stats, accuracy_stats, bing_accuracy_stats, bing_time_stats, bing_filtered_accuracy_stats, bing_filtered_time_stats = calculate_statistical_summary(test_data)
md_file.write("## Statistical summary\n\n")
md_file.write("| Statistic\Engine | 🔎 Finesse Accuracy score| 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |⌛ Finesse Time | ⌛ Bing Time | ⌛ Filtered Bing Time |\n")
md_file.write(f"| Statistic\Engine | 🔎 {engine.title()} Accuracy score| 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |⌛ {engine.title()} Time | ⌛ Bing Time | ⌛ Filtered Bing Time |\n")
md_file.write("|---|---|---|---|---|---|---|\n")
for stat in ["Mean", "Median", "Standard Deviation", "Maximum", "Minimum"]:
md_file.write(f"|{stat}| {accuracy_stats.get(stat)}% | {bing_accuracy_stats.get(stat)}% | {bing_filtered_accuracy_stats.get(stat)}% |{time_stats.get(stat)}ms | {bing_time_stats.get(stat)}ms | {bing_filtered_time_stats.get(stat)}ms |\n")

md_file.write("\n## Count of null and top scores\n\n")
md_file.write("| Score\Engine | 🔎 Finesse Accuracy score| 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |\n")
md_file.write("\n## Count of top results\n\n")
md_file.write(f"| Count\Engine | 🔎 {engine.title()} Accuracy score| 🌐 Bing Accuracy Score | 🌐 Filtered Bing Accuracy Score |\n")
md_file.write("|---|---|---|---|\n")
finesse_null, finesse_top = count_null_top_scores({key: value.get("accuracy") for key, value in test_data.items()})
bing_null, bing_top = count_null_top_scores({key: value.get("bing_accuracy") for key, value in test_data.items()})
bing_filtered_null, bing_filtered_top = count_null_top_scores({key: value.get("bing_filtered_accuracy") for key, value in test_data.items()})
finesse_top_1 = count_top_results(test_data, 1, "accuracy")
bing_top_1 = count_top_results(test_data, 1, "bing_accuracy")
bing_filtered_top_1 = count_top_results(test_data, 1, "bing_filtered_accuracy")
md_file.write(f"| 🏆 Top 1 | {finesse_top_1} | {bing_top_1} | {bing_filtered_top_1} |\n")

md_file.write(f"| Null (0%) | {finesse_null} | {bing_null} |{bing_filtered_null} |\n")
md_file.write(f"| Top (100%)| {finesse_top} | {bing_top} |{bing_filtered_top} |\n")
finesse_top_3 = count_top_results(test_data, 3, "accuracy")
bing_top_3 = count_top_results(test_data, 3, "bing_accuracy")
bing_filtered_top_3 = count_top_results(test_data, 3, "bing_filtered_accuracy")
md_file.write(f"| ✅ Top 3 | {finesse_top_3} | {bing_top_3} | {bing_filtered_top_3} |\n")

def count_null_top_scores(accuracy_scores: dict):
"""
Counts the number of null scores and top scores in the given accuracy_scores dictionary.
finesse_top_5 = count_top_results(test_data, 5, "accuracy")
bing_top_5 = count_top_results(test_data, 5, "bing_accuracy")
bing_filtered_top_5 = count_top_results(test_data, 5, "bing_filtered_accuracy")
md_file.write(f"|✅ Top 5 | {finesse_top_5} | {bing_top_5} | {bing_filtered_top_5} |\n")

Args:
accuracy_scores (dict): A dictionary containing accuracy scores.
Returns:
tuple: A tuple containing the count of null scores and top scores, respectively.
"""
null_scores = len([score for score in accuracy_scores.values() if score == 0])
top_scores = len([score for score in accuracy_scores.values() if score == 1])
finesse_top_10 = count_top_results(test_data, 10, "accuracy")
bing_top_10 = count_top_results(test_data, 10, "bing_accuracy")
bing_filtered_top_10 = count_top_results(test_data, 10, "bing_filtered_accuracy")
md_file.write(f"|✅ Top 10 | {finesse_top_10} | {bing_top_10} | {bing_filtered_top_10} |\n")

return null_scores, top_scores
md_file.write(f"| ❌ Not in top 10 | {len(test_data) - finesse_top_10} | {len(test_data) - bing_top_10} | {len(test_data) - bing_filtered_top_10} |\n")

def save_to_csv(test_data: dict, engine: str):
if not os.path.exists(OUTPUT_FOLDER):
Expand Down

0 comments on commit 9a5838b

Please sign in to comment.