2424import os
2525import sys
2626import json
27+ import uuid
2728from datetime import datetime
2829from pathlib import Path
2930from typing import Dict , List , Any , Optional
@@ -94,6 +95,9 @@ def discover_benchmarks(benches_dir: str) -> List[Dict[str, Any]]:
9495 else :
9596 logging .warning (f"No runner function found in { py_file } " )
9697
98+ except ImportError as e :
99+ logging .error (f"Failed to import { py_file } : Missing dependency - { e } " )
100+ logging .error (f"Make sure to install: pip install -r requirements.txt" )
97101 except Exception as e :
98102 logging .error (f"Failed to import { py_file } : { e } " )
99103
@@ -168,7 +172,8 @@ def run_single_benchmark(
168172def generate_summary_report (
169173 output_dir : str ,
170174 benchmark_results : Dict [str , Any ],
171- logger : logging .Logger
175+ logger : logging .Logger ,
176+ benchmark_run_uuid : Optional [str ] = None
172177) -> str :
173178 """Generate a summary report of all benchmark runs."""
174179 timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
@@ -177,6 +182,7 @@ def generate_summary_report(
177182 summary_data = {
178183 "run_metadata" : {
179184 "timestamp" : datetime .utcnow ().isoformat (),
185+ "benchmark_run_uuid" : benchmark_run_uuid ,
180186 "total_benchmarks" : len (benchmark_results ),
181187 "successful_benchmarks" : len ([r for r in benchmark_results .values () if r is not None ]),
182188 "failed_benchmarks" : len ([r for r in benchmark_results .values () if r is None ])
@@ -192,10 +198,111 @@ def generate_summary_report(
192198 return summary_file
193199
194200
201+ def upload_results_to_hf_dataset (
202+ output_dir : str ,
203+ summary_file : str ,
204+ dataset_name : str ,
205+ run_id : Optional [str ] = None ,
206+ logger : logging .Logger = None
207+ ) -> Optional [str ]:
208+ """
209+ Upload benchmark results to a HuggingFace Dataset.
210+ Based on upload_collated_report() from utils/collated_reports.py
211+
212+ Args:
213+ output_dir: Local output directory containing results
214+ summary_file: Path to the summary file
215+ dataset_name: Name of the HuggingFace dataset to upload to
216+ run_id: Unique run identifier (if None, will generate one)
217+ logger: Logger instance
218+
219+ Returns:
220+ The run_id used for the upload, None if upload failed
221+ """
222+ if logger is None :
223+ logger = logging .getLogger (__name__ )
224+
225+ import os
226+ from huggingface_hub import HfApi
227+
228+ api = HfApi ()
229+
230+ if run_id is None :
231+ github_run_number = os .getenv ("GITHUB_RUN_NUMBER" )
232+ github_run_id = os .getenv ("GITHUB_RUN_ID" )
233+ if github_run_number and github_run_id :
234+ run_id = f"{ github_run_number } -{ github_run_id } "
235+
236+ date_folder = datetime .now ().strftime ("%Y-%m-%d" )
237+
238+ github_event_name = os .getenv ("GITHUB_EVENT_NAME" )
239+ if github_event_name != "schedule" :
240+ # Non-scheduled runs go under a runs subfolder
241+ repo_path = f"{ date_folder } /runs/{ run_id } /benchmark_results"
242+ else :
243+ # Scheduled runs go directly under the date
244+ repo_path = f"{ date_folder } /{ run_id } /benchmark_results"
245+
246+ logger .info (f"Uploading benchmark results to dataset '{ dataset_name } ' at path '{ repo_path } '" )
247+
248+ try :
249+ # Get the authentication token (prioritize specific token, fallback to HF_TOKEN)
250+ token = os .getenv ("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN" ) or os .getenv ("HF_TOKEN" )
251+
252+ # Upload all files in the output directory
253+ from pathlib import Path
254+ output_path = Path (output_dir )
255+
256+ for file_path in output_path .rglob ("*" ):
257+ if file_path .is_file ():
258+ # Calculate relative path from output_dir
259+ relative_path = file_path .relative_to (output_path )
260+ path_in_repo = f"{ repo_path } /{ relative_path } "
261+
262+ logger .debug (f"Uploading { file_path } to { path_in_repo } " )
263+
264+ api .upload_file (
265+ path_or_fileobj = str (file_path ),
266+ path_in_repo = path_in_repo ,
267+ repo_id = dataset_name ,
268+ repo_type = "dataset" ,
269+ token = token ,
270+ commit_message = f"Upload benchmark results for run { run_id } "
271+ )
272+
273+ logger .info (f"Successfully uploaded results to: https://huggingface.co/datasets/{ dataset_name } /tree/main/{ repo_path } " )
274+
275+ return run_id
276+
277+ except Exception as upload_error :
278+ logger .error (f"Failed to upload results: { upload_error } " )
279+ import traceback
280+ logger .debug (traceback .format_exc ())
281+ return None
282+
283+
195284def main ():
196285 """Main entry point for the benchmarking script."""
286+ # Generate a unique UUID for this benchmark run
287+ benchmark_run_uuid = str (uuid .uuid4 ())[:8 ]
288+
197289 parser = argparse .ArgumentParser (
198- description = "Run all benchmarks in the ./benches directory"
290+ description = "Run all benchmarks in the ./benches directory" ,
291+ epilog = """
292+ Examples:
293+ # Run all available benchmarks
294+ python3 run_benchmarks.py
295+
296+ # Run with specific model and upload to HuggingFace Dataset
297+ python3 run_benchmarks.py --model-id meta-llama/Llama-2-7b-hf --upload-to-hf username/benchmark-results
298+
299+ # Run with custom run ID and upload to HuggingFace Dataset
300+ python3 run_benchmarks.py --run-id experiment_v1 --upload-to-hf org/benchmarks
301+
302+ # Run only specific benchmarks with file logging
303+ python3 run_benchmarks.py --include llama --enable-file-logging
304+ """ ,
305+ formatter_class = argparse .RawDescriptionHelpFormatter
199306 )
200307
201308 parser .add_argument (
@@ -261,12 +368,6 @@ def main():
261368 help = "Exclude benchmarks matching these names"
262369 )
263370
264- parser .add_argument (
265- "--enable-mock" ,
266- action = "store_true" ,
267- help = "Enable mock benchmark (skipped by default)"
268- )
269-
270371 parser .add_argument (
271372 "--enable-file-logging" ,
272373 action = "store_true" ,
@@ -279,12 +380,25 @@ def main():
279380 help = "Git commit ID for metadata (if not provided, will auto-detect from git)"
280381 )
281382
383+ parser .add_argument (
384+ "--upload-to-hub" ,
385+ type = str ,
386+ help = "Upload results to HuggingFace Dataset (provide dataset name, e.g., 'username/benchmark-results')"
387+ )
388+
389+ parser .add_argument (
390+ "--run-id" ,
391+ type = str ,
392+ help = "Custom run ID for organizing results (if not provided, will generate a unique ID)"
393+ )
394+
282395 args = parser .parse_args ()
283396
284397 # Setup logging
285398 logger = setup_logging (args .log_level , args .enable_file_logging )
286399
287400 logger .info ("Starting benchmark discovery and execution" )
401+ logger .info (f"Benchmark run UUID: { benchmark_run_uuid } " )
288402 logger .info (f"Output directory: { args .output_dir } " )
289403 logger .info (f"Benches directory: { args .benches_dir } " )
290404
@@ -327,9 +441,6 @@ def main():
327441 if args .model_id :
328442 benchmark_kwargs ['model_id' ] = args .model_id
329443
330- # Add enable_mock flag for mock benchmark
331- benchmark_kwargs ['enable_mock' ] = args .enable_mock
332-
333444 # Add commit_id if provided
334445 if args .commit_id :
335446 benchmark_kwargs ['commit_id' ] = args .commit_id
@@ -352,7 +463,27 @@ def main():
352463 successful_count += 1
353464
354465 # Generate summary report
355- summary_file = generate_summary_report (args .output_dir , benchmark_results , logger )
466+ summary_file = generate_summary_report (args .output_dir , benchmark_results , logger , benchmark_run_uuid )
467+
468+ # Upload results to HuggingFace Dataset if requested
469+ upload_run_id = None
470+ if args .upload_to_hf :
471+ logger .info ("=" * 60 )
472+ logger .info ("UPLOADING TO HUGGINGFACE DATASET" )
473+ logger .info ("=" * 60 )
474+ # Use provided run_id or fallback to benchmark run UUID
475+ effective_run_id = args .run_id or benchmark_run_uuid
476+ upload_run_id = upload_results_to_hf_dataset (
477+ output_dir = args .output_dir ,
478+ summary_file = summary_file ,
479+ dataset_name = args .upload_to_hf ,
480+ run_id = effective_run_id ,
481+ logger = logger
482+ )
483+ if upload_run_id :
484+ logger .info (f"Upload completed with run ID: { upload_run_id } " )
485+ else :
486+ logger .warning ("Upload failed - continuing with local results" )
356487
357488 # Final summary
358489 total_benchmarks = len (filtered_benchmarks )
@@ -367,6 +498,14 @@ def main():
367498 logger .info (f"Output directory: { args .output_dir } " )
368499 logger .info (f"Summary report: { summary_file } " )
369500
501+ if args .upload_to_hf :
502+ if upload_run_id :
503+ logger .info (f"HuggingFace Dataset: { args .upload_to_hf } " )
504+ logger .info (f"Run ID: { upload_run_id } " )
505+ logger .info (f"View results: https://huggingface.co/datasets/{ args .upload_to_hf } /tree/main/{ datetime .now ().strftime ('%Y-%m-%d' )} /runs/{ upload_run_id } " )
506+ else :
507+ logger .warning ("Upload to HuggingFace Dataset failed" )
508+
370509 if failed_count > 0 :
371510 logger .warning (f"{ failed_count } benchmark(s) failed. Check logs for details." )
372511 return 1
0 commit comments