Skip to content

Commit

Permalink
change error message and make output prettier
Browse files Browse the repository at this point in the history
  • Loading branch information
drisspg committed Jun 26, 2024
1 parent f4b50b4 commit 5ffe391
Showing 1 changed file with 19 additions and 14 deletions.
33 changes: 19 additions & 14 deletions scripts/hf_eval.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import torch
from tabulate import tabulate

from transformers import AutoModelForCausalLM, AutoTokenizer
try:
Expand All @@ -9,17 +10,7 @@
print("""
Error: The 'lm_eval' module was not found.
To install, follow these steps:
1. Clone the repository:
git clone https://github.com/EleutherAI/lm-evaluation-harness
2. Change to the cloned directory:
cd lm-evaluation-harness
3. Install the package in editable mode:
pip install -e .
After installation, re-run this script to use the LM Evaluation Harness.
pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git
""")
raise # Re-raise the ImportError

Expand All @@ -33,6 +24,21 @@
torch._inductor.config.force_fuse_int_mm_with_mul = True
torch._inductor.config.fx_graph_cache = True

def pretty_print_nested_results(results, precision: int = 6):
def format_value(value):
if isinstance(value, float):
return f"{value:.{precision}f}"
return value

main_table = []
for task, metrics in results["results"].items():
subtable = [[k, format_value(v)] for k, v in metrics.items() if k != 'alias']
subtable.sort(key=lambda x: x[0]) # Sort metrics alphabetically
formatted_subtable = tabulate(subtable, tablefmt='grid')
main_table.append([task, formatted_subtable])

print(tabulate(main_table, headers=['Task', 'Metrics'], tablefmt='grid'))

def run_evaluation(repo_id, tasks, limit, device, precision, quantization, compile, batch_size, max_length):

tokenizer = AutoTokenizer.from_pretrained(repo_id)
Expand All @@ -50,7 +56,6 @@ def run_evaluation(repo_id, tasks, limit, device, precision, quantization, compi
change_linear_weights_to_int4_woqtensors(model.to(device=device))
elif quantization == "autoquant":
model = autoquant(model.to(device=device))

with torch.no_grad():
result = evaluate(
HFLM(
Expand All @@ -61,8 +66,8 @@ def run_evaluation(repo_id, tasks, limit, device, precision, quantization, compi
get_task_dict(tasks),
limit = limit,
)
for task, res in result["results"].items():
print(f"{task}: {res}")

pretty_print_nested_results(result)


if __name__ == '__main__':
Expand Down

0 comments on commit 5ffe391

Please sign in to comment.