From 37be20fff4572d00de155af91fae4f3be4b34503 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 11:50:49 +0000 Subject: [PATCH 01/31] WIP benchmark v2 workflow --- .github/workflows/benchmark_v2.yml | 111 ++++++++++++++++++++++ .github/workflows/benchmark_v2_caller.yml | 64 +++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 .github/workflows/benchmark_v2.yml create mode 100644 .github/workflows/benchmark_v2_caller.yml diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml new file mode 100644 index 000000000000..6d10147df27a --- /dev/null +++ b/.github/workflows/benchmark_v2.yml @@ -0,0 +1,111 @@ +name: Benchmark v2 Framework + +on: + workflow_call: + inputs: + model_id: + description: 'Model ID to benchmark (e.g., meta-llama/Llama-2-7b-hf)' + required: false + type: string + default: '' + warmup_iterations: + description: 'Number of warmup iterations' + required: false + type: number + default: 3 + measurement_iterations: + description: 'Number of measurement iterations' + required: false + type: number + default: 5 + num_tokens_to_generate: + description: 'Number of tokens to generate' + required: false + type: number + default: 100 + commit_sha: + description: 'Commit SHA to benchmark' + required: false + type: string + default: '' + +env: + HF_HOME: /mnt/cache + TRANSFORMERS_IS_CI: yes + # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. + # This token is created under the bot `hf-transformers-bot`. + HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} + +jobs: + benchmark-v2: + name: Benchmark v2 + strategy: + matrix: + # Use GPU-enabled runners for accurate benchmarking + group: [aws-g5-4xlarge-cache] + runs-on: + group: ${{ matrix.group }} + container: + image: huggingface/transformers-pytorch-gpu + options: --gpus all --privileged --ipc host --shm-size "16gb" + steps: + - name: Get repo + uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit_sha || github.sha }} + + - name: Update clone + if: inputs.commit_sha + run: | + git fetch && git checkout ${{ inputs.commit_sha }} + + - name: Install benchmark dependencies + working-directory: benchmark_v2 + run: | + python3 -m pip install -r requirements.txt + + - name: Reinstall transformers in edit mode + run: | + python3 -m pip uninstall -y transformers + python3 -m pip install -e ".[torch]" + + - name: Show installed libraries and their versions + run: | + python3 -m pip list + python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')" + python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')" + python3 -c "import torch; print(f'CUDA device count: {torch.cuda.device_count()}')" || true + nvidia-smi || true + + - name: Prepare benchmark arguments + id: prepare-args + run: | + args="--log-level INFO" + + # Add model ID if specified + if [ -n "${{ inputs.model_id }}" ]; then + args="$args --model-id '${{ inputs.model_id }}'" + fi + + # Add iterations + args="$args --warmup-iterations ${{ inputs.warmup_iterations }}" + args="$args --measurement-iterations ${{ inputs.measurement_iterations }}" + args="$args --num-tokens-to-generate ${{ inputs.num_tokens_to_generate }}" + + # Add commit ID if available + if [ -n "${{ inputs.commit_sha }}" ]; then + args="$args --commit-id '${{ inputs.commit_sha }}'" + elif [ -n "${{ github.sha }}" ]; then + args="$args --commit-id '${{ github.sha }}'" + fi + + echo "benchmark_args=$args" >> $GITHUB_OUTPUT + echo "Benchmark arguments: $args" + + - name: Run benchmark v2 + working-directory: benchmark_v2 + run: | + echo "Running benchmark with args: ${{ steps.prepare-args.outputs.benchmark_args }}" + python3 run_benchmarks.py ${{ steps.prepare-args.outputs.benchmark_args }} + env: + HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml new file mode 100644 index 000000000000..521cea5a0f3a --- /dev/null +++ b/.github/workflows/benchmark_v2_caller.yml @@ -0,0 +1,64 @@ +name: Benchmark v2 Scheduled Runner + +on: + schedule: + # Run daily at 2:30 AM UTC + - cron: "30 2 * * *" + push: + branches: + - run_nvidia_benchmark* + workflow_dispatch: + inputs: + model_id: + description: 'Model ID to benchmark (leave empty for default models)' + required: false + type: string + default: '' + warmup_iterations: + description: 'Number of warmup iterations' + required: false + type: number + default: 3 + measurement_iterations: + description: 'Number of measurement iterations' + required: false + type: number + default: 5 + num_tokens_to_generate: + description: 'Number of tokens to generate' + required: false + type: number + default: 100 + include_benchmarks: + description: 'Benchmarks to include (comma-separated, e.g., "llama")' + required: false + type: string + default: '' + exclude_benchmarks: + description: 'Benchmarks to exclude (comma-separated)' + required: false + type: string + default: '' + enable_file_logging: + description: 'Enable file logging' + required: false + type: boolean + default: false + repository_dispatch: + types: [benchmark_v2_trigger] + +jobs: + benchmark-v2-default: + name: Benchmark v2 - Default Models + uses: ./.github/workflows/benchmark_v2.yml + with: + model_id: ${{ inputs.model_id || '' }} + warmup_iterations: ${{ inputs.warmup_iterations || 3 }} + measurement_iterations: ${{ inputs.measurement_iterations || 5 }} + num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} + include_benchmarks: ${{ inputs.include_benchmarks || '' }} + exclude_benchmarks: ${{ inputs.exclude_benchmarks || '' }} + enable_file_logging: ${{ inputs.enable_file_logging || false }} + slack_report_channel: '#transformers-ci-benchmark-v2' + commit_sha: ${{ github.sha }} + secrets: inherit \ No newline at end of file From d0231bff1523a6c997b2cbea35e2a241716be9f1 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 12:32:20 +0000 Subject: [PATCH 02/31] Container was missing --- .github/workflows/benchmark_v2_caller.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index 521cea5a0f3a..a3bd4e97e712 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -51,6 +51,15 @@ jobs: benchmark-v2-default: name: Benchmark v2 - Default Models uses: ./.github/workflows/benchmark_v2.yml + strategy: + matrix: + # group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled) + group: [aws-g5-4xlarge-cache] + runs-on: + group: ${{ matrix.group }} + container: + image: huggingface/transformers-pytorch-gpu + options: --gpus all --privileged --ipc host with: model_id: ${{ inputs.model_id || '' }} warmup_iterations: ${{ inputs.warmup_iterations || 3 }} From 09f1dc59a5b2263a4dac8a7c80e8b3256e872c53 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 12:59:32 +0000 Subject: [PATCH 03/31] Change to sandbox branch name --- .github/workflows/benchmark_v2_caller.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index a3bd4e97e712..bf8cd29a8468 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -6,7 +6,7 @@ on: - cron: "30 2 * * *" push: branches: - - run_nvidia_benchmark* + - benchmarking-gh-actions* workflow_dispatch: inputs: model_id: From e1a62296f78e3a926ce87bc5b539cc2a7145990a Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 13:08:11 +0000 Subject: [PATCH 04/31] Wrong place for image name --- .github/workflows/benchmark_v2_caller.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index bf8cd29a8468..332737d674bc 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -51,15 +51,6 @@ jobs: benchmark-v2-default: name: Benchmark v2 - Default Models uses: ./.github/workflows/benchmark_v2.yml - strategy: - matrix: - # group: [aws-g5-4xlarge-cache, aws-p4d-24xlarge-plus] (A100 runner is not enabled) - group: [aws-g5-4xlarge-cache] - runs-on: - group: ${{ matrix.group }} - container: - image: huggingface/transformers-pytorch-gpu - options: --gpus all --privileged --ipc host with: model_id: ${{ inputs.model_id || '' }} warmup_iterations: ${{ inputs.warmup_iterations || 3 }} From 03b360900e80982050d5a6d7e7cfdc7b98ea4844 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 13:09:01 +0000 Subject: [PATCH 05/31] Variable declarations --- .github/workflows/benchmark_v2_caller.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index 332737d674bc..b0b36a497cb4 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -56,9 +56,6 @@ jobs: warmup_iterations: ${{ inputs.warmup_iterations || 3 }} measurement_iterations: ${{ inputs.measurement_iterations || 5 }} num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} - include_benchmarks: ${{ inputs.include_benchmarks || '' }} - exclude_benchmarks: ${{ inputs.exclude_benchmarks || '' }} enable_file_logging: ${{ inputs.enable_file_logging || false }} - slack_report_channel: '#transformers-ci-benchmark-v2' commit_sha: ${{ github.sha }} secrets: inherit \ No newline at end of file From 52393e38904fc95fc5b884f05c72ae1228bab12d Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 13:11:02 +0000 Subject: [PATCH 06/31] Remove references to file logging --- .github/workflows/benchmark_v2_caller.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index b0b36a497cb4..9d359f9f57dc 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -39,11 +39,6 @@ on: required: false type: string default: '' - enable_file_logging: - description: 'Enable file logging' - required: false - type: boolean - default: false repository_dispatch: types: [benchmark_v2_trigger] @@ -56,6 +51,5 @@ jobs: warmup_iterations: ${{ inputs.warmup_iterations || 3 }} measurement_iterations: ${{ inputs.measurement_iterations || 5 }} num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} - enable_file_logging: ${{ inputs.enable_file_logging || false }} commit_sha: ${{ github.sha }} secrets: inherit \ No newline at end of file From 78ff33d3b4a1e9a83308aaab99762232f949f3e1 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 13:20:22 +0000 Subject: [PATCH 07/31] Remove unnecessary step --- .github/workflows/benchmark_v2.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 6d10147df27a..f21dd4ad5540 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -54,11 +54,6 @@ jobs: with: ref: ${{ inputs.commit_sha || github.sha }} - - name: Update clone - if: inputs.commit_sha - run: | - git fetch && git checkout ${{ inputs.commit_sha }} - - name: Install benchmark dependencies working-directory: benchmark_v2 run: | From 57e3cdac97cc19574722d6b1bd09e2a1bfdd3c80 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 13:31:50 +0000 Subject: [PATCH 08/31] Fix deps install --- .github/workflows/benchmark_v2.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index f21dd4ad5540..afd9c9e51b24 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -55,9 +55,8 @@ jobs: ref: ${{ inputs.commit_sha || github.sha }} - name: Install benchmark dependencies - working-directory: benchmark_v2 run: | - python3 -m pip install -r requirements.txt + run: python3 -m pip install -r benchmark_v2/requirements.txt - name: Reinstall transformers in edit mode run: | @@ -98,9 +97,8 @@ jobs: echo "Benchmark arguments: $args" - name: Run benchmark v2 - working-directory: benchmark_v2 run: | echo "Running benchmark with args: ${{ steps.prepare-args.outputs.benchmark_args }}" - python3 run_benchmarks.py ${{ steps.prepare-args.outputs.benchmark_args }} + python3 benchmark_v2/run_benchmarks.py ${{ steps.prepare-args.outputs.benchmark_args }} env: HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} \ No newline at end of file From 1deb38e84d590a06d5eace9f3d04c0ba3d3bea73 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 13:40:01 +0000 Subject: [PATCH 09/31] Syntax --- .github/workflows/benchmark_v2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index afd9c9e51b24..4c0dfd4d825e 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -56,7 +56,7 @@ jobs: - name: Install benchmark dependencies run: | - run: python3 -m pip install -r benchmark_v2/requirements.txt + python3 -m pip install -r benchmark_v2/requirements.txt - name: Reinstall transformers in edit mode run: | From 059d740897a0a70800d771f3647d0484baf55411 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 5 Sep 2025 14:08:11 +0000 Subject: [PATCH 10/31] Add workdir --- .github/workflows/benchmark_v2.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 4c0dfd4d825e..8de64b8fcfc4 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -97,8 +97,9 @@ jobs: echo "Benchmark arguments: $args" - name: Run benchmark v2 + working-directory: benchmark_v2 run: | echo "Running benchmark with args: ${{ steps.prepare-args.outputs.benchmark_args }}" - python3 benchmark_v2/run_benchmarks.py ${{ steps.prepare-args.outputs.benchmark_args }} + python3 run_benchmarks.py ${{ steps.prepare-args.outputs.benchmark_args }} env: HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} \ No newline at end of file From e6c45b6195173f22d028d9ff34e460b5361c3e49 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 18:12:17 +0000 Subject: [PATCH 11/31] Add upload feature --- .github/workflows/benchmark_v2.yml | 24 ++++ .github/workflows/benchmark_v2_caller.yml | 3 + benchmark_v2/README.md | 30 ++++ benchmark_v2/requirements.txt | 3 +- benchmark_v2/run_benchmarks.py | 163 ++++++++++++++++++++-- 5 files changed, 210 insertions(+), 13 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 8de64b8fcfc4..5edc24f853e2 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -28,6 +28,21 @@ on: required: false type: string default: '' + upload_to_hf: + description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")' + required: false + type: string + default: '' + run_id: + description: 'Custom run ID for organizing results (auto-generated if not provided)' + required: false + type: string + default: '' + benchmark_repo_id: + description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")' + required: false + type: string + default: '' env: HF_HOME: /mnt/cache @@ -93,6 +108,15 @@ jobs: args="$args --commit-id '${{ github.sha }}'" fi + # Add HuggingFace upload parameters if specified + if [ -n "${{ inputs.upload_to_hf }}" ]; then + args="$args --upload-to-hf '${{ inputs.upload_to_hf }}'" + fi + + if [ -n "${{ inputs.run_id }}" ]; then + args="$args --run-id '${{ inputs.run_id }}'" + fi + echo "benchmark_args=$args" >> $GITHUB_OUTPUT echo "Benchmark arguments: $args" diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index 9d359f9f57dc..615a29bdecde 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -52,4 +52,7 @@ jobs: measurement_iterations: ${{ inputs.measurement_iterations || 5 }} num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} commit_sha: ${{ github.sha }} + upload_to_hf: true + run_id: ${{ github.run_id }} + benchmark_repo_id: ahadnagy/transformers-benchmarks secrets: inherit \ No newline at end of file diff --git a/benchmark_v2/README.md b/benchmark_v2/README.md index 9a0102b387fc..1d34de6408c7 100644 --- a/benchmark_v2/README.md +++ b/benchmark_v2/README.md @@ -21,6 +21,36 @@ python run_benchmarks.py \ --num-tokens-to-generate 200 ``` +### Uploading Results to HuggingFace Dataset + +You can automatically upload benchmark results to a HuggingFace Dataset for tracking and analysis: + +```bash +# Upload to a public dataset with auto-generated run ID +python run_benchmarks.py --upload-to-hf username/benchmark-results + +# Upload with a custom run ID for easy identification +python run_benchmarks.py --upload-to-hf username/benchmark-results --run-id experiment_v1 +``` + +**Dataset Directory Structure:** +``` +dataset_name/ +├── 2025-01-15/ +│ ├── runs/ # Non-scheduled runs (manual, PR, etc.) +│ │ └── 123-1245151651/ # GitHub run number and ID +│ │ └── benchmark_results/ +│ │ ├── benchmark_summary_20250115_143022.json +│ │ └── model-name/ +│ │ └── model-name_benchmark_20250115_143022.json +│ └── benchmark_results_abc123de/ # Scheduled runs (daily CI) +│ ├── benchmark_summary_20250115_143022.json +│ └── model-name/ +│ └── model-name_benchmark_20250115_143022.json +└── 2025-01-16/ + └── ... +``` + ### Running Specific Benchmarks ```bash diff --git a/benchmark_v2/requirements.txt b/benchmark_v2/requirements.txt index a7a435958cf7..e4dcbb3eb7ef 100644 --- a/benchmark_v2/requirements.txt +++ b/benchmark_v2/requirements.txt @@ -3,4 +3,5 @@ psutil>=5.8.0 gpustat>=1.0.0 torch>=2.0.0 transformers>=4.30.0 -datasets>=2.10.0 \ No newline at end of file +datasets>=2.10.0 +huggingface_hub>=0.16.0 \ No newline at end of file diff --git a/benchmark_v2/run_benchmarks.py b/benchmark_v2/run_benchmarks.py index 9a147b5dde6e..12b6ecd4a23a 100755 --- a/benchmark_v2/run_benchmarks.py +++ b/benchmark_v2/run_benchmarks.py @@ -24,6 +24,7 @@ import os import sys import json +import uuid from datetime import datetime from pathlib import Path from typing import Dict, List, Any, Optional @@ -94,6 +95,9 @@ def discover_benchmarks(benches_dir: str) -> List[Dict[str, Any]]: else: logging.warning(f"No runner function found in {py_file}") + except ImportError as e: + logging.error(f"Failed to import {py_file}: Missing dependency - {e}") + logging.error(f"Make sure to install: pip install -r requirements.txt") except Exception as e: logging.error(f"Failed to import {py_file}: {e}") @@ -168,7 +172,8 @@ def run_single_benchmark( def generate_summary_report( output_dir: str, benchmark_results: Dict[str, Any], - logger: logging.Logger + logger: logging.Logger, + benchmark_run_uuid: Optional[str] = None ) -> str: """Generate a summary report of all benchmark runs.""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") @@ -177,6 +182,7 @@ def generate_summary_report( summary_data = { "run_metadata": { "timestamp": datetime.utcnow().isoformat(), + "benchmark_run_uuid": benchmark_run_uuid, "total_benchmarks": len(benchmark_results), "successful_benchmarks": len([r for r in benchmark_results.values() if r is not None]), "failed_benchmarks": len([r for r in benchmark_results.values() if r is None]) @@ -192,10 +198,111 @@ def generate_summary_report( return summary_file +def upload_results_to_hf_dataset( + output_dir: str, + summary_file: str, + dataset_name: str, + run_id: Optional[str] = None, + logger: logging.Logger = None +) -> Optional[str]: + """ + Upload benchmark results to a HuggingFace Dataset. + Based on upload_collated_report() from utils/collated_reports.py + + Args: + output_dir: Local output directory containing results + summary_file: Path to the summary file + dataset_name: Name of the HuggingFace dataset to upload to + run_id: Unique run identifier (if None, will generate one) + logger: Logger instance + + Returns: + The run_id used for the upload, None if upload failed + """ + if logger is None: + logger = logging.getLogger(__name__) + + import os + from huggingface_hub import HfApi + + api = HfApi() + + if run_id is None: + github_run_number = os.getenv("GITHUB_RUN_NUMBER") + github_run_id = os.getenv("GITHUB_RUN_ID") + if github_run_number and github_run_id: + run_id = f"{github_run_number}-{github_run_id}" + + date_folder = datetime.now().strftime("%Y-%m-%d") + + github_event_name = os.getenv("GITHUB_EVENT_NAME") + if github_event_name != "schedule": + # Non-scheduled runs go under a runs subfolder + repo_path = f"{date_folder}/runs/{run_id}/benchmark_results" + else: + # Scheduled runs go directly under the date + repo_path = f"{date_folder}/{run_id}/benchmark_results" + + logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'") + + try: + # Get the authentication token (prioritize specific token, fallback to HF_TOKEN) + token = os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN") or os.getenv("HF_TOKEN") + + # Upload all files in the output directory + from pathlib import Path + output_path = Path(output_dir) + + for file_path in output_path.rglob("*"): + if file_path.is_file(): + # Calculate relative path from output_dir + relative_path = file_path.relative_to(output_path) + path_in_repo = f"{repo_path}/{relative_path}" + + logger.debug(f"Uploading {file_path} to {path_in_repo}") + + api.upload_file( + path_or_fileobj=str(file_path), + path_in_repo=path_in_repo, + repo_id=dataset_name, + repo_type="dataset", + token=token, + commit_message=f"Upload benchmark results for run {run_id}" + ) + + logger.info(f"Successfully uploaded results to: https://huggingface.co/datasets/{dataset_name}/tree/main/{repo_path}") + + return run_id + + except Exception as upload_error: + logger.error(f"Failed to upload results: {upload_error}") + import traceback + logger.debug(traceback.format_exc()) + return None + + def main(): """Main entry point for the benchmarking script.""" + # Generate a unique UUID for this benchmark run + benchmark_run_uuid = str(uuid.uuid4())[:8] + parser = argparse.ArgumentParser( - description="Run all benchmarks in the ./benches directory" + description="Run all benchmarks in the ./benches directory", + epilog=""" +Examples: + # Run all available benchmarks + python3 run_benchmarks.py + + # Run with specific model and upload to HuggingFace Dataset + python3 run_benchmarks.py --model-id meta-llama/Llama-2-7b-hf --upload-to-hf username/benchmark-results + + # Run with custom run ID and upload to HuggingFace Dataset + python3 run_benchmarks.py --run-id experiment_v1 --upload-to-hf org/benchmarks + + # Run only specific benchmarks with file logging + python3 run_benchmarks.py --include llama --enable-file-logging + """, + formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( @@ -261,12 +368,6 @@ def main(): help="Exclude benchmarks matching these names" ) - parser.add_argument( - "--enable-mock", - action="store_true", - help="Enable mock benchmark (skipped by default)" - ) - parser.add_argument( "--enable-file-logging", action="store_true", @@ -279,12 +380,25 @@ def main(): help="Git commit ID for metadata (if not provided, will auto-detect from git)" ) + parser.add_argument( + "--upload-to-hub", + type=str, + help="Upload results to HuggingFace Dataset (provide dataset name, e.g., 'username/benchmark-results')" + ) + + parser.add_argument( + "--run-id", + type=str, + help="Custom run ID for organizing results (if not provided, will generate a unique ID)" + ) + args = parser.parse_args() # Setup logging logger = setup_logging(args.log_level, args.enable_file_logging) logger.info("Starting benchmark discovery and execution") + logger.info(f"Benchmark run UUID: {benchmark_run_uuid}") logger.info(f"Output directory: {args.output_dir}") logger.info(f"Benches directory: {args.benches_dir}") @@ -327,9 +441,6 @@ def main(): if args.model_id: benchmark_kwargs['model_id'] = args.model_id - # Add enable_mock flag for mock benchmark - benchmark_kwargs['enable_mock'] = args.enable_mock - # Add commit_id if provided if args.commit_id: benchmark_kwargs['commit_id'] = args.commit_id @@ -352,7 +463,27 @@ def main(): successful_count += 1 # Generate summary report - summary_file = generate_summary_report(args.output_dir, benchmark_results, logger) + summary_file = generate_summary_report(args.output_dir, benchmark_results, logger, benchmark_run_uuid) + + # Upload results to HuggingFace Dataset if requested + upload_run_id = None + if args.upload_to_hf: + logger.info("=" * 60) + logger.info("UPLOADING TO HUGGINGFACE DATASET") + logger.info("=" * 60) + # Use provided run_id or fallback to benchmark run UUID + effective_run_id = args.run_id or benchmark_run_uuid + upload_run_id = upload_results_to_hf_dataset( + output_dir=args.output_dir, + summary_file=summary_file, + dataset_name=args.upload_to_hf, + run_id=effective_run_id, + logger=logger + ) + if upload_run_id: + logger.info(f"Upload completed with run ID: {upload_run_id}") + else: + logger.warning("Upload failed - continuing with local results") # Final summary total_benchmarks = len(filtered_benchmarks) @@ -367,6 +498,14 @@ def main(): logger.info(f"Output directory: {args.output_dir}") logger.info(f"Summary report: {summary_file}") + if args.upload_to_hf: + if upload_run_id: + logger.info(f"HuggingFace Dataset: {args.upload_to_hf}") + logger.info(f"Run ID: {upload_run_id}") + logger.info(f"View results: https://huggingface.co/datasets/{args.upload_to_hf}/tree/main/{datetime.now().strftime('%Y-%m-%d')}/runs/{upload_run_id}") + else: + logger.warning("Upload to HuggingFace Dataset failed") + if failed_count > 0: logger.warning(f"{failed_count} benchmark(s) failed. Check logs for details.") return 1 From a6a29249e0db9b03ab4ccc8ee6876a180b5d3c8c Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 18:21:04 +0000 Subject: [PATCH 12/31] typo --- .github/workflows/benchmark_v2.yml | 6 +++--- .github/workflows/benchmark_v2_caller.yml | 2 +- benchmark_v2/run_benchmarks.py | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 5edc24f853e2..54a11704f74b 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -28,7 +28,7 @@ on: required: false type: string default: '' - upload_to_hf: + upload_to_hub: description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")' required: false type: string @@ -109,8 +109,8 @@ jobs: fi # Add HuggingFace upload parameters if specified - if [ -n "${{ inputs.upload_to_hf }}" ]; then - args="$args --upload-to-hf '${{ inputs.upload_to_hf }}'" + if [ -n "${{ inputs.upload_to_hub }}" ]; then + args="$args --upload-to-hub '${{ inputs.upload_to_hub }}'" fi if [ -n "${{ inputs.run_id }}" ]; then diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index 615a29bdecde..8fdfc0fac3c3 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -52,7 +52,7 @@ jobs: measurement_iterations: ${{ inputs.measurement_iterations || 5 }} num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} commit_sha: ${{ github.sha }} - upload_to_hf: true + upload_to_hub: true run_id: ${{ github.run_id }} benchmark_repo_id: ahadnagy/transformers-benchmarks secrets: inherit \ No newline at end of file diff --git a/benchmark_v2/run_benchmarks.py b/benchmark_v2/run_benchmarks.py index 12b6ecd4a23a..477fbdeaf43d 100755 --- a/benchmark_v2/run_benchmarks.py +++ b/benchmark_v2/run_benchmarks.py @@ -467,7 +467,7 @@ def main(): # Upload results to HuggingFace Dataset if requested upload_run_id = None - if args.upload_to_hf: + if args.upload_to_hub: logger.info("=" * 60) logger.info("UPLOADING TO HUGGINGFACE DATASET") logger.info("=" * 60) @@ -476,7 +476,7 @@ def main(): upload_run_id = upload_results_to_hf_dataset( output_dir=args.output_dir, summary_file=summary_file, - dataset_name=args.upload_to_hf, + dataset_name=args.upload_to_hub, run_id=effective_run_id, logger=logger ) @@ -498,11 +498,11 @@ def main(): logger.info(f"Output directory: {args.output_dir}") logger.info(f"Summary report: {summary_file}") - if args.upload_to_hf: + if args.upload_to_hub: if upload_run_id: - logger.info(f"HuggingFace Dataset: {args.upload_to_hf}") + logger.info(f"HuggingFace Dataset: {args.upload_to_hub}") logger.info(f"Run ID: {upload_run_id}") - logger.info(f"View results: https://huggingface.co/datasets/{args.upload_to_hf}/tree/main/{datetime.now().strftime('%Y-%m-%d')}/runs/{upload_run_id}") + logger.info(f"View results: https://huggingface.co/datasets/{args.upload_to_hub}/tree/main/{datetime.now().strftime('%Y-%m-%d')}/runs/{upload_run_id}") else: logger.warning("Upload to HuggingFace Dataset failed") From e72be0ec356de56d8f48a04df7ee4a447de72d4d Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 18:26:45 +0000 Subject: [PATCH 13/31] No need for hf_transfer --- benchmark_v2/benches/llama.py | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmark_v2/benches/llama.py b/benchmark_v2/benches/llama.py index 7075f5834c05..006ffa5e0f9e 100644 --- a/benchmark_v2/benches/llama.py +++ b/benchmark_v2/benches/llama.py @@ -20,7 +20,6 @@ import torch -os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" os.environ["TOKENIZERS_PARALLELISM"] = "1" torch.set_float32_matmul_precision("high") From fdc430113fc21c7a4fe6d866b950486ddddcf6ba Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 18:42:05 +0000 Subject: [PATCH 14/31] Pass in runner --- .github/workflows/benchmark_v2.yml | 16 +++++++++++----- .github/workflows/benchmark_v2_caller.yml | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 54a11704f74b..c023aafff0e7 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -3,6 +3,15 @@ name: Benchmark v2 Framework on: workflow_call: inputs: + runner_group: + description: 'GH Actions runner group to use' + required: false + type: string + runner_labels: + description: 'GH Actions runner label to use' + required: false + type: string + default: null model_id: description: 'Model ID to benchmark (e.g., meta-llama/Llama-2-7b-hf)' required: false @@ -54,12 +63,9 @@ env: jobs: benchmark-v2: name: Benchmark v2 - strategy: - matrix: - # Use GPU-enabled runners for accurate benchmarking - group: [aws-g5-4xlarge-cache] runs-on: - group: ${{ matrix.group }} + group: ${{ inputs.runner_group }} + labels: ${{ inputs.runner_labels }} container: image: huggingface/transformers-pytorch-gpu options: --gpus all --privileged --ipc host --shm-size "16gb" diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index 8fdfc0fac3c3..e7a074bd9fd7 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -12,7 +12,6 @@ on: model_id: description: 'Model ID to benchmark (leave empty for default models)' required: false - type: string default: '' warmup_iterations: description: 'Number of warmup iterations' @@ -47,6 +46,7 @@ jobs: name: Benchmark v2 - Default Models uses: ./.github/workflows/benchmark_v2.yml with: + runner_group: "aws-g5-4xlarge-cache" model_id: ${{ inputs.model_id || '' }} warmup_iterations: ${{ inputs.warmup_iterations || 3 }} measurement_iterations: ${{ inputs.measurement_iterations || 5 }} From 5bcba35cc03d366452f3f095e1807a0ff6e23cfa Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 18:59:24 +0000 Subject: [PATCH 15/31] Runner config --- .github/workflows/benchmark_v2.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index c023aafff0e7..701fa0a80c1d 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -10,7 +10,6 @@ on: runner_labels: description: 'GH Actions runner label to use' required: false - type: string default: null model_id: description: 'Model ID to benchmark (e.g., meta-llama/Llama-2-7b-hf)' From 6bb52ed959f30f77a6810bad6e0bd35756251164 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 19:00:27 +0000 Subject: [PATCH 16/31] Runner config --- .github/workflows/benchmark_v2.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 701fa0a80c1d..c023aafff0e7 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -10,6 +10,7 @@ on: runner_labels: description: 'GH Actions runner label to use' required: false + type: string default: null model_id: description: 'Model ID to benchmark (e.g., meta-llama/Llama-2-7b-hf)' From 4bf4a815baad6bd68dce1e000198afb9e5f0f228 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 19:01:14 +0000 Subject: [PATCH 17/31] Runner config --- .github/workflows/benchmark_v2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index c023aafff0e7..5e3bad4d55a8 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -65,7 +65,7 @@ jobs: name: Benchmark v2 runs-on: group: ${{ inputs.runner_group }} - labels: ${{ inputs.runner_labels }} + labels: none container: image: huggingface/transformers-pytorch-gpu options: --gpus all --privileged --ipc host --shm-size "16gb" From 02bf83a6bc2aa2e37ddd8faf63f895d79faa438f Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 19:06:05 +0000 Subject: [PATCH 18/31] Runner config --- .github/workflows/benchmark_v2.yml | 13 +++---------- .github/workflows/benchmark_v2_caller.yml | 3 +-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 5e3bad4d55a8..2cc80c1218c6 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -3,15 +3,10 @@ name: Benchmark v2 Framework on: workflow_call: inputs: - runner_group: + runner: description: 'GH Actions runner group to use' - required: false - type: string - runner_labels: - description: 'GH Actions runner label to use' - required: false + required: true type: string - default: null model_id: description: 'Model ID to benchmark (e.g., meta-llama/Llama-2-7b-hf)' required: false @@ -63,9 +58,7 @@ env: jobs: benchmark-v2: name: Benchmark v2 - runs-on: - group: ${{ inputs.runner_group }} - labels: none + runs-on: ${{ inputs.runner }} container: image: huggingface/transformers-pytorch-gpu options: --gpus all --privileged --ipc host --shm-size "16gb" diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index e7a074bd9fd7..5564d9c0dde5 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -46,8 +46,7 @@ jobs: name: Benchmark v2 - Default Models uses: ./.github/workflows/benchmark_v2.yml with: - runner_group: "aws-g5-4xlarge-cache" - model_id: ${{ inputs.model_id || '' }} + runner: "aws-g5-4xlarge-cache" warmup_iterations: ${{ inputs.warmup_iterations || 3 }} measurement_iterations: ${{ inputs.measurement_iterations || 5 }} num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} From 8fb84632dfcb88e4f9bc503eb80469b03dd5d79d Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sat, 6 Sep 2025 19:09:10 +0000 Subject: [PATCH 19/31] Runner config --- .github/workflows/benchmark_v2_caller.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_caller.yml index 5564d9c0dde5..6eaa4d5e5cb9 100644 --- a/.github/workflows/benchmark_v2_caller.yml +++ b/.github/workflows/benchmark_v2_caller.yml @@ -46,7 +46,7 @@ jobs: name: Benchmark v2 - Default Models uses: ./.github/workflows/benchmark_v2.yml with: - runner: "aws-g5-4xlarge-cache" + runner: aws-g5-4xlarge-cache-use1-public-80 warmup_iterations: ${{ inputs.warmup_iterations || 3 }} measurement_iterations: ${{ inputs.measurement_iterations || 5 }} num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} From 904cab046ecbc9cd73f3a130950a87d7272e5a8f Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sun, 7 Sep 2025 10:33:11 +0000 Subject: [PATCH 20/31] mi325 caller --- ...caller.yml => benchmark_v2_a10_caller.yml} | 0 .../workflows/benchmark_v2_mi325_caller.yml | 57 +++++++++++++++++++ 2 files changed, 57 insertions(+) rename .github/workflows/{benchmark_v2_caller.yml => benchmark_v2_a10_caller.yml} (100%) create mode 100644 .github/workflows/benchmark_v2_mi325_caller.yml diff --git a/.github/workflows/benchmark_v2_caller.yml b/.github/workflows/benchmark_v2_a10_caller.yml similarity index 100% rename from .github/workflows/benchmark_v2_caller.yml rename to .github/workflows/benchmark_v2_a10_caller.yml diff --git a/.github/workflows/benchmark_v2_mi325_caller.yml b/.github/workflows/benchmark_v2_mi325_caller.yml new file mode 100644 index 000000000000..7124f9545269 --- /dev/null +++ b/.github/workflows/benchmark_v2_mi325_caller.yml @@ -0,0 +1,57 @@ +name: Benchmark v2 Scheduled Runner + +on: + schedule: + # Run daily at 2:30 AM UTC + - cron: "30 2 * * *" + push: + branches: + - benchmarking-gh-actions* + workflow_dispatch: + inputs: + model_id: + description: 'Model ID to benchmark (leave empty for default models)' + required: false + default: '' + warmup_iterations: + description: 'Number of warmup iterations' + required: false + type: number + default: 3 + measurement_iterations: + description: 'Number of measurement iterations' + required: false + type: number + default: 5 + num_tokens_to_generate: + description: 'Number of tokens to generate' + required: false + type: number + default: 100 + include_benchmarks: + description: 'Benchmarks to include (comma-separated, e.g., "llama")' + required: false + type: string + default: '' + exclude_benchmarks: + description: 'Benchmarks to exclude (comma-separated)' + required: false + type: string + default: '' + repository_dispatch: + types: [benchmark_v2_trigger] + +jobs: + benchmark-v2-default: + name: Benchmark v2 - Default Models + uses: ./.github/workflows/benchmark_v2.yml + with: + runner: amd-mi325-ci-1gpu + warmup_iterations: ${{ inputs.warmup_iterations || 3 }} + measurement_iterations: ${{ inputs.measurement_iterations || 5 }} + num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} + commit_sha: ${{ github.sha }} + upload_to_hub: true + run_id: ${{ github.run_id }} + benchmark_repo_id: ahadnagy/transformers-benchmarks + secrets: inherit \ No newline at end of file From 0834e285fd376dbcdd20ce1dcd3f303537f28e13 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sun, 7 Sep 2025 10:36:47 +0000 Subject: [PATCH 21/31] Name workflow runs properly --- .github/workflows/benchmark_v2_a10_caller.yml | 2 +- .github/workflows/benchmark_v2_mi325_caller.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark_v2_a10_caller.yml b/.github/workflows/benchmark_v2_a10_caller.yml index 6eaa4d5e5cb9..840d72994d5a 100644 --- a/.github/workflows/benchmark_v2_a10_caller.yml +++ b/.github/workflows/benchmark_v2_a10_caller.yml @@ -1,4 +1,4 @@ -name: Benchmark v2 Scheduled Runner +name: Benchmark v2 Scheduled Runner - MI325 Single-GPU on: schedule: diff --git a/.github/workflows/benchmark_v2_mi325_caller.yml b/.github/workflows/benchmark_v2_mi325_caller.yml index 7124f9545269..affea601b5ba 100644 --- a/.github/workflows/benchmark_v2_mi325_caller.yml +++ b/.github/workflows/benchmark_v2_mi325_caller.yml @@ -1,4 +1,4 @@ -name: Benchmark v2 Scheduled Runner +name: Benchmark v2 Scheduled Runner - MI325 Single-GPU on: schedule: From 34162896210da99b05cdaf8ca00ad61314b6a11f Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Sun, 7 Sep 2025 11:47:10 +0000 Subject: [PATCH 22/31] Copy-paste error --- .github/workflows/benchmark_v2_a10_caller.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2_a10_caller.yml b/.github/workflows/benchmark_v2_a10_caller.yml index 840d72994d5a..9754c4fa7c9b 100644 --- a/.github/workflows/benchmark_v2_a10_caller.yml +++ b/.github/workflows/benchmark_v2_a10_caller.yml @@ -1,4 +1,4 @@ -name: Benchmark v2 Scheduled Runner - MI325 Single-GPU +name: Benchmark v2 Scheduled Runner - A10 Single-GPU on: schedule: From 6ef320970699919d75d4738c89eafdea12267c2f Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Mon, 8 Sep 2025 09:12:15 +0000 Subject: [PATCH 23/31] Add final repo IDs and schedule --- .github/workflows/benchmark_v2_a10_caller.yml | 8 ++++---- .github/workflows/benchmark_v2_mi325_caller.yml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/benchmark_v2_a10_caller.yml b/.github/workflows/benchmark_v2_a10_caller.yml index 9754c4fa7c9b..6b55642b19a0 100644 --- a/.github/workflows/benchmark_v2_a10_caller.yml +++ b/.github/workflows/benchmark_v2_a10_caller.yml @@ -2,11 +2,11 @@ name: Benchmark v2 Scheduled Runner - A10 Single-GPU on: schedule: - # Run daily at 2:30 AM UTC - - cron: "30 2 * * *" + # Run daily at 16:30 UTC + - cron: "30 16 * * *" push: branches: - - benchmarking-gh-actions* + - run-benchmarking-gh-actions* workflow_dispatch: inputs: model_id: @@ -53,5 +53,5 @@ jobs: commit_sha: ${{ github.sha }} upload_to_hub: true run_id: ${{ github.run_id }} - benchmark_repo_id: ahadnagy/transformers-benchmarks + benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks secrets: inherit \ No newline at end of file diff --git a/.github/workflows/benchmark_v2_mi325_caller.yml b/.github/workflows/benchmark_v2_mi325_caller.yml index affea601b5ba..c0d58b628567 100644 --- a/.github/workflows/benchmark_v2_mi325_caller.yml +++ b/.github/workflows/benchmark_v2_mi325_caller.yml @@ -2,11 +2,11 @@ name: Benchmark v2 Scheduled Runner - MI325 Single-GPU on: schedule: - # Run daily at 2:30 AM UTC - - cron: "30 2 * * *" + # Run daily at 16:30 UTC + - cron: "30 16 * * *" push: branches: - - benchmarking-gh-actions* + - run-benchmarking-gh-actions* workflow_dispatch: inputs: model_id: @@ -53,5 +53,5 @@ jobs: commit_sha: ${{ github.sha }} upload_to_hub: true run_id: ${{ github.run_id }} - benchmark_repo_id: ahadnagy/transformers-benchmarks + benchmark_repo_id: optimum-amd/transformers-daily-benchmarks secrets: inherit \ No newline at end of file From 16bee68afc59a6254556d33c18c8fd9151dda57c Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Tue, 9 Sep 2025 18:00:19 +0000 Subject: [PATCH 24/31] Review comments --- .github/workflows/benchmark_v2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 2cc80c1218c6..9d66e68b028b 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -33,7 +33,7 @@ on: type: string default: '' upload_to_hub: - description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")' + description: 'Enable/disable uploading results to a HuggingFace Dataset' required: false type: string default: '' From f5151a481e8376be90047f5f1b42e2521df52f33 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Wed, 10 Sep 2025 16:22:14 +0000 Subject: [PATCH 25/31] Remove wf params --- .github/workflows/benchmark_v2_a10_caller.yml | 39 ++----------------- .../workflows/benchmark_v2_mi325_caller.yml | 39 ++----------------- 2 files changed, 6 insertions(+), 72 deletions(-) diff --git a/.github/workflows/benchmark_v2_a10_caller.yml b/.github/workflows/benchmark_v2_a10_caller.yml index 6b55642b19a0..d1f220bc2185 100644 --- a/.github/workflows/benchmark_v2_a10_caller.yml +++ b/.github/workflows/benchmark_v2_a10_caller.yml @@ -7,39 +7,6 @@ on: push: branches: - run-benchmarking-gh-actions* - workflow_dispatch: - inputs: - model_id: - description: 'Model ID to benchmark (leave empty for default models)' - required: false - default: '' - warmup_iterations: - description: 'Number of warmup iterations' - required: false - type: number - default: 3 - measurement_iterations: - description: 'Number of measurement iterations' - required: false - type: number - default: 5 - num_tokens_to_generate: - description: 'Number of tokens to generate' - required: false - type: number - default: 100 - include_benchmarks: - description: 'Benchmarks to include (comma-separated, e.g., "llama")' - required: false - type: string - default: '' - exclude_benchmarks: - description: 'Benchmarks to exclude (comma-separated)' - required: false - type: string - default: '' - repository_dispatch: - types: [benchmark_v2_trigger] jobs: benchmark-v2-default: @@ -47,9 +14,9 @@ jobs: uses: ./.github/workflows/benchmark_v2.yml with: runner: aws-g5-4xlarge-cache-use1-public-80 - warmup_iterations: ${{ inputs.warmup_iterations || 3 }} - measurement_iterations: ${{ inputs.measurement_iterations || 5 }} - num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} + warmup_iterations: 3 + measurement_iterations: 5 + num_tokens_to_generate: 100 commit_sha: ${{ github.sha }} upload_to_hub: true run_id: ${{ github.run_id }} diff --git a/.github/workflows/benchmark_v2_mi325_caller.yml b/.github/workflows/benchmark_v2_mi325_caller.yml index c0d58b628567..9476f7bc071e 100644 --- a/.github/workflows/benchmark_v2_mi325_caller.yml +++ b/.github/workflows/benchmark_v2_mi325_caller.yml @@ -7,39 +7,6 @@ on: push: branches: - run-benchmarking-gh-actions* - workflow_dispatch: - inputs: - model_id: - description: 'Model ID to benchmark (leave empty for default models)' - required: false - default: '' - warmup_iterations: - description: 'Number of warmup iterations' - required: false - type: number - default: 3 - measurement_iterations: - description: 'Number of measurement iterations' - required: false - type: number - default: 5 - num_tokens_to_generate: - description: 'Number of tokens to generate' - required: false - type: number - default: 100 - include_benchmarks: - description: 'Benchmarks to include (comma-separated, e.g., "llama")' - required: false - type: string - default: '' - exclude_benchmarks: - description: 'Benchmarks to exclude (comma-separated)' - required: false - type: string - default: '' - repository_dispatch: - types: [benchmark_v2_trigger] jobs: benchmark-v2-default: @@ -47,9 +14,9 @@ jobs: uses: ./.github/workflows/benchmark_v2.yml with: runner: amd-mi325-ci-1gpu - warmup_iterations: ${{ inputs.warmup_iterations || 3 }} - measurement_iterations: ${{ inputs.measurement_iterations || 5 }} - num_tokens_to_generate: ${{ inputs.num_tokens_to_generate || 100 }} + warmup_iterations: 3 + measurement_iterations: 5 + num_tokens_to_generate: 100 commit_sha: ${{ github.sha }} upload_to_hub: true run_id: ${{ github.run_id }} From f0701d7b2258e2379796578bcd39328a774db77d Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Thu, 11 Sep 2025 09:54:04 +0000 Subject: [PATCH 26/31] Remove parametrization from worfkflow files --- .github/workflows/benchmark_v2.yml | 65 ++++-------------------------- 1 file changed, 8 insertions(+), 57 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 9d66e68b028b..11146be9434b 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -7,26 +7,6 @@ on: description: 'GH Actions runner group to use' required: true type: string - model_id: - description: 'Model ID to benchmark (e.g., meta-llama/Llama-2-7b-hf)' - required: false - type: string - default: '' - warmup_iterations: - description: 'Number of warmup iterations' - required: false - type: number - default: 3 - measurement_iterations: - description: 'Number of measurement iterations' - required: false - type: number - default: 5 - num_tokens_to_generate: - description: 'Number of tokens to generate' - required: false - type: number - default: 100 commit_sha: description: 'Commit SHA to benchmark' required: false @@ -36,7 +16,7 @@ on: description: 'Enable/disable uploading results to a HuggingFace Dataset' required: false type: string - default: '' + default: 'false' run_id: description: 'Custom run ID for organizing results (auto-generated if not provided)' required: false @@ -85,44 +65,15 @@ jobs: python3 -c "import torch; print(f'CUDA device count: {torch.cuda.device_count()}')" || true nvidia-smi || true - - name: Prepare benchmark arguments - id: prepare-args - run: | - args="--log-level INFO" - - # Add model ID if specified - if [ -n "${{ inputs.model_id }}" ]; then - args="$args --model-id '${{ inputs.model_id }}'" - fi - - # Add iterations - args="$args --warmup-iterations ${{ inputs.warmup_iterations }}" - args="$args --measurement-iterations ${{ inputs.measurement_iterations }}" - args="$args --num-tokens-to-generate ${{ inputs.num_tokens_to_generate }}" - - # Add commit ID if available - if [ -n "${{ inputs.commit_sha }}" ]; then - args="$args --commit-id '${{ inputs.commit_sha }}'" - elif [ -n "${{ github.sha }}" ]; then - args="$args --commit-id '${{ github.sha }}'" - fi - - # Add HuggingFace upload parameters if specified - if [ -n "${{ inputs.upload_to_hub }}" ]; then - args="$args --upload-to-hub '${{ inputs.upload_to_hub }}'" - fi - - if [ -n "${{ inputs.run_id }}" ]; then - args="$args --run-id '${{ inputs.run_id }}'" - fi - - echo "benchmark_args=$args" >> $GITHUB_OUTPUT - echo "Benchmark arguments: $args" - - name: Run benchmark v2 working-directory: benchmark_v2 run: | - echo "Running benchmark with args: ${{ steps.prepare-args.outputs.benchmark_args }}" - python3 run_benchmarks.py ${{ steps.prepare-args.outputs.benchmark_args }} + echo "Running benchmarks" + python3 run_benchmarks.py \ + --commit-id '${{ inputs.commit_sha || github.sha }}' \ + --upload-to-hub '${{ inputs.upload_to_hub || false}}' \ + --run-id '${{ inputs.run_id }}' \ + --benchmark-repo-id '${{ inputs.benchmark_repo_id}}' \ + --log-level INFO env: HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} \ No newline at end of file From 738e07e6b8b4bd488c8bdfdb2b0010e314c5c2d4 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Thu, 11 Sep 2025 09:56:15 +0000 Subject: [PATCH 27/31] Fix callers --- .github/workflows/benchmark_v2_a10_caller.yml | 3 --- .github/workflows/benchmark_v2_mi325_caller.yml | 3 --- 2 files changed, 6 deletions(-) diff --git a/.github/workflows/benchmark_v2_a10_caller.yml b/.github/workflows/benchmark_v2_a10_caller.yml index d1f220bc2185..9ff9408f98a2 100644 --- a/.github/workflows/benchmark_v2_a10_caller.yml +++ b/.github/workflows/benchmark_v2_a10_caller.yml @@ -14,9 +14,6 @@ jobs: uses: ./.github/workflows/benchmark_v2.yml with: runner: aws-g5-4xlarge-cache-use1-public-80 - warmup_iterations: 3 - measurement_iterations: 5 - num_tokens_to_generate: 100 commit_sha: ${{ github.sha }} upload_to_hub: true run_id: ${{ github.run_id }} diff --git a/.github/workflows/benchmark_v2_mi325_caller.yml b/.github/workflows/benchmark_v2_mi325_caller.yml index 9476f7bc071e..a6866f56d0db 100644 --- a/.github/workflows/benchmark_v2_mi325_caller.yml +++ b/.github/workflows/benchmark_v2_mi325_caller.yml @@ -14,9 +14,6 @@ jobs: uses: ./.github/workflows/benchmark_v2.yml with: runner: amd-mi325-ci-1gpu - warmup_iterations: 3 - measurement_iterations: 5 - num_tokens_to_generate: 100 commit_sha: ${{ github.sha }} upload_to_hub: true run_id: ${{ github.run_id }} From 00a4e1f9d44166453b7c3e14b05b2a262b0a33d9 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Mon, 15 Sep 2025 09:56:53 +0000 Subject: [PATCH 28/31] Change push trigger to pull_request + label --- .github/workflows/benchmark_v2.yml | 2 ++ .github/workflows/benchmark_v2_a10_caller.yml | 5 ++--- .github/workflows/benchmark_v2_mi325_caller.yml | 5 ++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 11146be9434b..ce3242f8919a 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -39,6 +39,8 @@ jobs: benchmark-v2: name: Benchmark v2 runs-on: ${{ inputs.runner }} + if: | + (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) container: image: huggingface/transformers-pytorch-gpu options: --gpus all --privileged --ipc host --shm-size "16gb" diff --git a/.github/workflows/benchmark_v2_a10_caller.yml b/.github/workflows/benchmark_v2_a10_caller.yml index 9ff9408f98a2..30b5e8be78a5 100644 --- a/.github/workflows/benchmark_v2_a10_caller.yml +++ b/.github/workflows/benchmark_v2_a10_caller.yml @@ -4,9 +4,8 @@ on: schedule: # Run daily at 16:30 UTC - cron: "30 16 * * *" - push: - branches: - - run-benchmarking-gh-actions* + pull_request: + types: [ opened, labeled, reopened, synchronize ] jobs: benchmark-v2-default: diff --git a/.github/workflows/benchmark_v2_mi325_caller.yml b/.github/workflows/benchmark_v2_mi325_caller.yml index a6866f56d0db..60bdb6fa3e85 100644 --- a/.github/workflows/benchmark_v2_mi325_caller.yml +++ b/.github/workflows/benchmark_v2_mi325_caller.yml @@ -4,9 +4,8 @@ on: schedule: # Run daily at 16:30 UTC - cron: "30 16 * * *" - push: - branches: - - run-benchmarking-gh-actions* + pull_request: + types: [ opened, labeled, reopened, synchronize ] jobs: benchmark-v2-default: From b8b7f5f7c1b93d033f8aa507cfb20f31aa49180c Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Mon, 15 Sep 2025 14:57:22 +0000 Subject: [PATCH 29/31] Add back schedule event --- .github/workflows/benchmark_v2.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index ce3242f8919a..44ecec72c0da 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -40,7 +40,8 @@ jobs: name: Benchmark v2 runs-on: ${{ inputs.runner }} if: | - (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) + (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) || + (github.event_name == 'schedule') container: image: huggingface/transformers-pytorch-gpu options: --gpus all --privileged --ipc host --shm-size "16gb" From e2b5c4e819920600939ff59a477c62a2f786732e Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Tue, 16 Sep 2025 15:34:54 +0000 Subject: [PATCH 30/31] Push to the same dataset --- .github/workflows/benchmark_v2_mi325_caller.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2_mi325_caller.yml b/.github/workflows/benchmark_v2_mi325_caller.yml index 60bdb6fa3e85..95fbeb5e5f6a 100644 --- a/.github/workflows/benchmark_v2_mi325_caller.yml +++ b/.github/workflows/benchmark_v2_mi325_caller.yml @@ -16,5 +16,5 @@ jobs: commit_sha: ${{ github.sha }} upload_to_hub: true run_id: ${{ github.run_id }} - benchmark_repo_id: optimum-amd/transformers-daily-benchmarks + benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks secrets: inherit \ No newline at end of file From 26de086ce8dce1131f0d027f1255f7d8884bc7d9 Mon Sep 17 00:00:00 2001 From: Akos Hadnagy Date: Fri, 19 Sep 2025 08:30:10 +0000 Subject: [PATCH 31/31] Simplify parameter description --- .github/workflows/benchmark_v2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml index 44ecec72c0da..350ad0144101 100644 --- a/.github/workflows/benchmark_v2.yml +++ b/.github/workflows/benchmark_v2.yml @@ -13,7 +13,7 @@ on: type: string default: '' upload_to_hub: - description: 'Enable/disable uploading results to a HuggingFace Dataset' + description: 'Uploading results to a HuggingFace Dataset' required: false type: string default: 'false'