Skip to content

Commit e6c45b6

Browse files
committed
Add upload feature
1 parent 059d740 commit e6c45b6

File tree

5 files changed

+210
-13
lines changed

5 files changed

+210
-13
lines changed

.github/workflows/benchmark_v2.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@ on:
2828
required: false
2929
type: string
3030
default: ''
31+
upload_to_hf:
32+
description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")'
33+
required: false
34+
type: string
35+
default: ''
36+
run_id:
37+
description: 'Custom run ID for organizing results (auto-generated if not provided)'
38+
required: false
39+
type: string
40+
default: ''
41+
benchmark_repo_id:
42+
description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")'
43+
required: false
44+
type: string
45+
default: ''
3146

3247
env:
3348
HF_HOME: /mnt/cache
@@ -93,6 +108,15 @@ jobs:
93108
args="$args --commit-id '${{ github.sha }}'"
94109
fi
95110
111+
# Add HuggingFace upload parameters if specified
112+
if [ -n "${{ inputs.upload_to_hf }}" ]; then
113+
args="$args --upload-to-hf '${{ inputs.upload_to_hf }}'"
114+
fi
115+
116+
if [ -n "${{ inputs.run_id }}" ]; then
117+
args="$args --run-id '${{ inputs.run_id }}'"
118+
fi
119+
96120
echo "benchmark_args=$args" >> $GITHUB_OUTPUT
97121
echo "Benchmark arguments: $args"
98122

.github/workflows/benchmark_v2_caller.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,7 @@ jobs:
5252
measurement_iterations: ${{ inputs.measurement_iterations || 5 }}
5353
num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }}
5454
commit_sha: ${{ github.sha }}
55+
upload_to_hf: true
56+
run_id: ${{ github.run_id }}
57+
benchmark_repo_id: ahadnagy/transformers-benchmarks
5558
secrets: inherit

benchmark_v2/README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,36 @@ python run_benchmarks.py \
2121
--num-tokens-to-generate 200
2222
```
2323

24+
### Uploading Results to HuggingFace Dataset
25+
26+
You can automatically upload benchmark results to a HuggingFace Dataset for tracking and analysis:
27+
28+
```bash
29+
# Upload to a public dataset with auto-generated run ID
30+
python run_benchmarks.py --upload-to-hf username/benchmark-results
31+
32+
# Upload with a custom run ID for easy identification
33+
python run_benchmarks.py --upload-to-hf username/benchmark-results --run-id experiment_v1
34+
```
35+
36+
**Dataset Directory Structure:**
37+
```
38+
dataset_name/
39+
├── 2025-01-15/
40+
│ ├── runs/ # Non-scheduled runs (manual, PR, etc.)
41+
│ │ └── 123-1245151651/ # GitHub run number and ID
42+
│ │ └── benchmark_results/
43+
│ │ ├── benchmark_summary_20250115_143022.json
44+
│ │ └── model-name/
45+
│ │ └── model-name_benchmark_20250115_143022.json
46+
│ └── benchmark_results_abc123de/ # Scheduled runs (daily CI)
47+
│ ├── benchmark_summary_20250115_143022.json
48+
│ └── model-name/
49+
│ └── model-name_benchmark_20250115_143022.json
50+
└── 2025-01-16/
51+
└── ...
52+
```
53+
2454
### Running Specific Benchmarks
2555

2656
```bash

benchmark_v2/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ psutil>=5.8.0
33
gpustat>=1.0.0
44
torch>=2.0.0
55
transformers>=4.30.0
6-
datasets>=2.10.0
6+
datasets>=2.10.0
7+
huggingface_hub>=0.16.0

benchmark_v2/run_benchmarks.py

Lines changed: 151 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import os
2525
import sys
2626
import json
27+
import uuid
2728
from datetime import datetime
2829
from pathlib import Path
2930
from typing import Dict, List, Any, Optional
@@ -94,6 +95,9 @@ def discover_benchmarks(benches_dir: str) -> List[Dict[str, Any]]:
9495
else:
9596
logging.warning(f"No runner function found in {py_file}")
9697

98+
except ImportError as e:
99+
logging.error(f"Failed to import {py_file}: Missing dependency - {e}")
100+
logging.error(f"Make sure to install: pip install -r requirements.txt")
97101
except Exception as e:
98102
logging.error(f"Failed to import {py_file}: {e}")
99103

@@ -168,7 +172,8 @@ def run_single_benchmark(
168172
def generate_summary_report(
169173
output_dir: str,
170174
benchmark_results: Dict[str, Any],
171-
logger: logging.Logger
175+
logger: logging.Logger,
176+
benchmark_run_uuid: Optional[str] = None
172177
) -> str:
173178
"""Generate a summary report of all benchmark runs."""
174179
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -177,6 +182,7 @@ def generate_summary_report(
177182
summary_data = {
178183
"run_metadata": {
179184
"timestamp": datetime.utcnow().isoformat(),
185+
"benchmark_run_uuid": benchmark_run_uuid,
180186
"total_benchmarks": len(benchmark_results),
181187
"successful_benchmarks": len([r for r in benchmark_results.values() if r is not None]),
182188
"failed_benchmarks": len([r for r in benchmark_results.values() if r is None])
@@ -192,10 +198,111 @@ def generate_summary_report(
192198
return summary_file
193199

194200

201+
def upload_results_to_hf_dataset(
202+
output_dir: str,
203+
summary_file: str,
204+
dataset_name: str,
205+
run_id: Optional[str] = None,
206+
logger: logging.Logger = None
207+
) -> Optional[str]:
208+
"""
209+
Upload benchmark results to a HuggingFace Dataset.
210+
Based on upload_collated_report() from utils/collated_reports.py
211+
212+
Args:
213+
output_dir: Local output directory containing results
214+
summary_file: Path to the summary file
215+
dataset_name: Name of the HuggingFace dataset to upload to
216+
run_id: Unique run identifier (if None, will generate one)
217+
logger: Logger instance
218+
219+
Returns:
220+
The run_id used for the upload, None if upload failed
221+
"""
222+
if logger is None:
223+
logger = logging.getLogger(__name__)
224+
225+
import os
226+
from huggingface_hub import HfApi
227+
228+
api = HfApi()
229+
230+
if run_id is None:
231+
github_run_number = os.getenv("GITHUB_RUN_NUMBER")
232+
github_run_id = os.getenv("GITHUB_RUN_ID")
233+
if github_run_number and github_run_id:
234+
run_id = f"{github_run_number}-{github_run_id}"
235+
236+
date_folder = datetime.now().strftime("%Y-%m-%d")
237+
238+
github_event_name = os.getenv("GITHUB_EVENT_NAME")
239+
if github_event_name != "schedule":
240+
# Non-scheduled runs go under a runs subfolder
241+
repo_path = f"{date_folder}/runs/{run_id}/benchmark_results"
242+
else:
243+
# Scheduled runs go directly under the date
244+
repo_path = f"{date_folder}/{run_id}/benchmark_results"
245+
246+
logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'")
247+
248+
try:
249+
# Get the authentication token (prioritize specific token, fallback to HF_TOKEN)
250+
token = os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN") or os.getenv("HF_TOKEN")
251+
252+
# Upload all files in the output directory
253+
from pathlib import Path
254+
output_path = Path(output_dir)
255+
256+
for file_path in output_path.rglob("*"):
257+
if file_path.is_file():
258+
# Calculate relative path from output_dir
259+
relative_path = file_path.relative_to(output_path)
260+
path_in_repo = f"{repo_path}/{relative_path}"
261+
262+
logger.debug(f"Uploading {file_path} to {path_in_repo}")
263+
264+
api.upload_file(
265+
path_or_fileobj=str(file_path),
266+
path_in_repo=path_in_repo,
267+
repo_id=dataset_name,
268+
repo_type="dataset",
269+
token=token,
270+
commit_message=f"Upload benchmark results for run {run_id}"
271+
)
272+
273+
logger.info(f"Successfully uploaded results to: https://huggingface.co/datasets/{dataset_name}/tree/main/{repo_path}")
274+
275+
return run_id
276+
277+
except Exception as upload_error:
278+
logger.error(f"Failed to upload results: {upload_error}")
279+
import traceback
280+
logger.debug(traceback.format_exc())
281+
return None
282+
283+
195284
def main():
196285
"""Main entry point for the benchmarking script."""
286+
# Generate a unique UUID for this benchmark run
287+
benchmark_run_uuid = str(uuid.uuid4())[:8]
288+
197289
parser = argparse.ArgumentParser(
198-
description="Run all benchmarks in the ./benches directory"
290+
description="Run all benchmarks in the ./benches directory",
291+
epilog="""
292+
Examples:
293+
# Run all available benchmarks
294+
python3 run_benchmarks.py
295+
296+
# Run with specific model and upload to HuggingFace Dataset
297+
python3 run_benchmarks.py --model-id meta-llama/Llama-2-7b-hf --upload-to-hf username/benchmark-results
298+
299+
# Run with custom run ID and upload to HuggingFace Dataset
300+
python3 run_benchmarks.py --run-id experiment_v1 --upload-to-hf org/benchmarks
301+
302+
# Run only specific benchmarks with file logging
303+
python3 run_benchmarks.py --include llama --enable-file-logging
304+
""",
305+
formatter_class=argparse.RawDescriptionHelpFormatter
199306
)
200307

201308
parser.add_argument(
@@ -261,12 +368,6 @@ def main():
261368
help="Exclude benchmarks matching these names"
262369
)
263370

264-
parser.add_argument(
265-
"--enable-mock",
266-
action="store_true",
267-
help="Enable mock benchmark (skipped by default)"
268-
)
269-
270371
parser.add_argument(
271372
"--enable-file-logging",
272373
action="store_true",
@@ -279,12 +380,25 @@ def main():
279380
help="Git commit ID for metadata (if not provided, will auto-detect from git)"
280381
)
281382

383+
parser.add_argument(
384+
"--upload-to-hub",
385+
type=str,
386+
help="Upload results to HuggingFace Dataset (provide dataset name, e.g., 'username/benchmark-results')"
387+
)
388+
389+
parser.add_argument(
390+
"--run-id",
391+
type=str,
392+
help="Custom run ID for organizing results (if not provided, will generate a unique ID)"
393+
)
394+
282395
args = parser.parse_args()
283396

284397
# Setup logging
285398
logger = setup_logging(args.log_level, args.enable_file_logging)
286399

287400
logger.info("Starting benchmark discovery and execution")
401+
logger.info(f"Benchmark run UUID: {benchmark_run_uuid}")
288402
logger.info(f"Output directory: {args.output_dir}")
289403
logger.info(f"Benches directory: {args.benches_dir}")
290404

@@ -327,9 +441,6 @@ def main():
327441
if args.model_id:
328442
benchmark_kwargs['model_id'] = args.model_id
329443

330-
# Add enable_mock flag for mock benchmark
331-
benchmark_kwargs['enable_mock'] = args.enable_mock
332-
333444
# Add commit_id if provided
334445
if args.commit_id:
335446
benchmark_kwargs['commit_id'] = args.commit_id
@@ -352,7 +463,27 @@ def main():
352463
successful_count += 1
353464

354465
# Generate summary report
355-
summary_file = generate_summary_report(args.output_dir, benchmark_results, logger)
466+
summary_file = generate_summary_report(args.output_dir, benchmark_results, logger, benchmark_run_uuid)
467+
468+
# Upload results to HuggingFace Dataset if requested
469+
upload_run_id = None
470+
if args.upload_to_hf:
471+
logger.info("=" * 60)
472+
logger.info("UPLOADING TO HUGGINGFACE DATASET")
473+
logger.info("=" * 60)
474+
# Use provided run_id or fallback to benchmark run UUID
475+
effective_run_id = args.run_id or benchmark_run_uuid
476+
upload_run_id = upload_results_to_hf_dataset(
477+
output_dir=args.output_dir,
478+
summary_file=summary_file,
479+
dataset_name=args.upload_to_hf,
480+
run_id=effective_run_id,
481+
logger=logger
482+
)
483+
if upload_run_id:
484+
logger.info(f"Upload completed with run ID: {upload_run_id}")
485+
else:
486+
logger.warning("Upload failed - continuing with local results")
356487

357488
# Final summary
358489
total_benchmarks = len(filtered_benchmarks)
@@ -367,6 +498,14 @@ def main():
367498
logger.info(f"Output directory: {args.output_dir}")
368499
logger.info(f"Summary report: {summary_file}")
369500

501+
if args.upload_to_hf:
502+
if upload_run_id:
503+
logger.info(f"HuggingFace Dataset: {args.upload_to_hf}")
504+
logger.info(f"Run ID: {upload_run_id}")
505+
logger.info(f"View results: https://huggingface.co/datasets/{args.upload_to_hf}/tree/main/{datetime.now().strftime('%Y-%m-%d')}/runs/{upload_run_id}")
506+
else:
507+
logger.warning("Upload to HuggingFace Dataset failed")
508+
370509
if failed_count > 0:
371510
logger.warning(f"{failed_count} benchmark(s) failed. Check logs for details.")
372511
return 1

0 commit comments

Comments
 (0)