diff --git a/.github/workflows/benchmark_v2.yml b/.github/workflows/benchmark_v2.yml
new file mode 100644
index 000000000000..350ad0144101
--- /dev/null
+++ b/.github/workflows/benchmark_v2.yml
@@ -0,0 +1,82 @@
+name: Benchmark v2 Framework
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        description: 'GH Actions runner group to use'
+        required: true
+        type: string
+      commit_sha:
+        description: 'Commit SHA to benchmark'
+        required: false
+        type: string
+        default: ''
+      upload_to_hub:
+        description: 'Uploading results to a HuggingFace Dataset'
+        required: false
+        type: string
+        default: 'false'
+      run_id:
+        description: 'Custom run ID for organizing results (auto-generated if not provided)'
+        required: false
+        type: string
+        default: ''
+      benchmark_repo_id:
+        description: 'HuggingFace Dataset to upload results to (e.g., "org/benchmark-results")'
+        required: false
+        type: string
+        default: ''
+
+env:
+  HF_HOME: /mnt/cache
+  TRANSFORMERS_IS_CI: yes
+  # For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
+  # This token is created under the bot `hf-transformers-bot`.
+  HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+
+jobs:
+  benchmark-v2:
+    name: Benchmark v2
+    runs-on: ${{ inputs.runner }}
+    if: |
+      (github.event_name == 'pull_request' && contains( github.event.pull_request.labels.*.name, 'run-benchmark')) ||
+      (github.event_name == 'schedule')
+    container:
+      image: huggingface/transformers-pytorch-gpu
+      options: --gpus all --privileged --ipc host --shm-size "16gb"
+    steps:
+      - name: Get repo
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ inputs.commit_sha || github.sha }}
+
+      - name: Install benchmark dependencies
+        run: |
+          python3 -m pip install -r benchmark_v2/requirements.txt
+
+      - name: Reinstall transformers in edit mode
+        run: |
+          python3 -m pip uninstall -y transformers
+          python3 -m pip install -e ".[torch]"
+
+      - name: Show installed libraries and their versions
+        run: |
+          python3 -m pip list
+          python3 -c "import torch; print(f'PyTorch version: {torch.__version__}')"
+          python3 -c "import torch; print(f'CUDA available: {torch.cuda.is_available()}')"
+          python3 -c "import torch; print(f'CUDA device count: {torch.cuda.device_count()}')" || true
+          nvidia-smi || true
+
+      - name: Run benchmark v2
+        working-directory: benchmark_v2
+        run: |
+          echo "Running benchmarks"
+          python3 run_benchmarks.py \
+          --commit-id '${{ inputs.commit_sha || github.sha }}' \
+          --upload-to-hub '${{ inputs.upload_to_hub || false}}' \
+          --run-id '${{ inputs.run_id }}' \
+          --benchmark-repo-id '${{ inputs.benchmark_repo_id}}' \
+          --log-level INFO
+        env:
+          HF_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
\ No newline at end of file
diff --git a/.github/workflows/benchmark_v2_a10_caller.yml b/.github/workflows/benchmark_v2_a10_caller.yml
new file mode 100644
index 000000000000..30b5e8be78a5
--- /dev/null
+++ b/.github/workflows/benchmark_v2_a10_caller.yml
@@ -0,0 +1,20 @@
+name: Benchmark v2 Scheduled Runner - A10 Single-GPU
+
+on:
+  schedule:
+    # Run daily at 16:30 UTC
+    - cron: "30 16 * * *"
+  pull_request:
+    types: [ opened, labeled, reopened, synchronize ]
+
+jobs:
+  benchmark-v2-default:
+    name: Benchmark v2 - Default Models
+    uses: ./.github/workflows/benchmark_v2.yml
+    with:
+      runner: aws-g5-4xlarge-cache-use1-public-80
+      commit_sha: ${{ github.sha }}
+      upload_to_hub: true
+      run_id: ${{ github.run_id }}
+      benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
+    secrets: inherit
\ No newline at end of file
diff --git a/.github/workflows/benchmark_v2_mi325_caller.yml b/.github/workflows/benchmark_v2_mi325_caller.yml
new file mode 100644
index 000000000000..95fbeb5e5f6a
--- /dev/null
+++ b/.github/workflows/benchmark_v2_mi325_caller.yml
@@ -0,0 +1,20 @@
+name: Benchmark v2 Scheduled Runner - MI325 Single-GPU
+
+on:
+  schedule:
+    # Run daily at 16:30 UTC
+    - cron: "30 16 * * *"
+  pull_request:
+    types: [ opened, labeled, reopened, synchronize ]
+
+jobs:
+  benchmark-v2-default:
+    name: Benchmark v2 - Default Models
+    uses: ./.github/workflows/benchmark_v2.yml
+    with:
+      runner: amd-mi325-ci-1gpu
+      commit_sha: ${{ github.sha }}
+      upload_to_hub: true
+      run_id: ${{ github.run_id }}
+      benchmark_repo_id: hf-internal-testing/transformers-daily-benchmarks
+    secrets: inherit
\ No newline at end of file
diff --git a/benchmark_v2/README.md b/benchmark_v2/README.md
index 9a0102b387fc..1d34de6408c7 100644
--- a/benchmark_v2/README.md
+++ b/benchmark_v2/README.md
@@ -21,6 +21,36 @@ python run_benchmarks.py \
     --num-tokens-to-generate 200
 ```
 
+### Uploading Results to HuggingFace Dataset
+
+You can automatically upload benchmark results to a HuggingFace Dataset for tracking and analysis:
+
+```bash
+# Upload to a public dataset with auto-generated run ID
+python run_benchmarks.py --upload-to-hf username/benchmark-results
+
+# Upload with a custom run ID for easy identification
+python run_benchmarks.py --upload-to-hf username/benchmark-results --run-id experiment_v1
+```
+
+**Dataset Directory Structure:**
+```
+dataset_name/
+├── 2025-01-15/
+│   ├── runs/                       # Non-scheduled runs (manual, PR, etc.)
+│   │   └── 123-1245151651/         # GitHub run number and ID
+│   │       └── benchmark_results/
+│   │           ├── benchmark_summary_20250115_143022.json
+│   │           └── model-name/
+│   │               └── model-name_benchmark_20250115_143022.json
+│   └── benchmark_results_abc123de/ # Scheduled runs (daily CI)
+│       ├── benchmark_summary_20250115_143022.json
+│       └── model-name/
+│           └── model-name_benchmark_20250115_143022.json
+└── 2025-01-16/
+    └── ...
+```
+
 ### Running Specific Benchmarks
 
 ```bash
diff --git a/benchmark_v2/benches/llama.py b/benchmark_v2/benches/llama.py
index 23427a8549c7..2349e75f1347 100644
--- a/benchmark_v2/benches/llama.py
+++ b/benchmark_v2/benches/llama.py
@@ -20,7 +20,6 @@
 from benchmark_framework import ModelBenchmark
 
 
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "1"
 torch.set_float32_matmul_precision("high")
 
diff --git a/benchmark_v2/requirements.txt b/benchmark_v2/requirements.txt
index a7a435958cf7..e4dcbb3eb7ef 100644
--- a/benchmark_v2/requirements.txt
+++ b/benchmark_v2/requirements.txt
@@ -3,4 +3,5 @@ psutil>=5.8.0
 gpustat>=1.0.0
 torch>=2.0.0
 transformers>=4.30.0
-datasets>=2.10.0 
\ No newline at end of file
+datasets>=2.10.0
+huggingface_hub>=0.16.0 
\ No newline at end of file
diff --git a/benchmark_v2/run_benchmarks.py b/benchmark_v2/run_benchmarks.py
index 26c816b9d16d..44f6515a2c30 100755
--- a/benchmark_v2/run_benchmarks.py
+++ b/benchmark_v2/run_benchmarks.py
@@ -24,6 +24,7 @@
 import logging
 import os
 import sys
+import uuid
 from datetime import datetime
 from pathlib import Path
 from typing import Any, Optional
@@ -160,7 +161,12 @@ def run_single_benchmark(
         return None
 
 
-def generate_summary_report(output_dir: str, benchmark_results: dict[str, Any], logger: logging.Logger) -> str:
+def generate_summary_report(
+    output_dir: str,
+    benchmark_results: dict[str, Any],
+    logger: logging.Logger,
+    benchmark_run_uuid: Optional[str] = None,
+) -> str:
     """Generate a summary report of all benchmark runs."""
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
     summary_file = os.path.join(output_dir, f"benchmark_summary_{timestamp}.json")
@@ -168,6 +174,7 @@ def generate_summary_report(output_dir: str, benchmark_results: dict[str, Any],
     summary_data = {
         "run_metadata": {
             "timestamp": datetime.utcnow().isoformat(),
+            "benchmark_run_uuid": benchmark_run_uuid,
             "total_benchmarks": len(benchmark_results),
             "successful_benchmarks": len([r for r in benchmark_results.values() if r is not None]),
             "failed_benchmarks": len([r for r in benchmark_results.values() if r is None]),
@@ -183,9 +190,115 @@ def generate_summary_report(output_dir: str, benchmark_results: dict[str, Any],
     return summary_file
 
 
+def upload_results_to_hf_dataset(
+    output_dir: str,
+    summary_file: str,
+    dataset_name: str,
+    run_id: Optional[str] = None,
+    logger: Optional[logging.Logger] = None,
+) -> Optional[str]:
+    """
+    Upload benchmark results to a HuggingFace Dataset.
+    Based on upload_collated_report() from utils/collated_reports.py
+    Args:
+        output_dir: Local output directory containing results
+        summary_file: Path to the summary file
+        dataset_name: Name of the HuggingFace dataset to upload to
+        run_id: Unique run identifier (if None, will generate one)
+        logger: Logger instance
+    Returns:
+        The run_id used for the upload, None if upload failed
+    """
+    if logger is None:
+        logger = logging.getLogger(__name__)
+
+    import os
+
+    from huggingface_hub import HfApi
+
+    api = HfApi()
+
+    if run_id is None:
+        github_run_number = os.getenv("GITHUB_RUN_NUMBER")
+        github_run_id = os.getenv("GITHUB_RUN_ID")
+        if github_run_number and github_run_id:
+            run_id = f"{github_run_number}-{github_run_id}"
+
+    date_folder = datetime.now().strftime("%Y-%m-%d")
+
+    github_event_name = os.getenv("GITHUB_EVENT_NAME")
+    if github_event_name != "schedule":
+        # Non-scheduled runs go under a runs subfolder
+        repo_path = f"{date_folder}/runs/{run_id}/benchmark_results"
+    else:
+        # Scheduled runs go directly under the date
+        repo_path = f"{date_folder}/{run_id}/benchmark_results"
+
+    logger.info(f"Uploading benchmark results to dataset '{dataset_name}' at path '{repo_path}'")
+
+    try:
+        # Get the authentication token (prioritize specific token, fallback to HF_TOKEN)
+        token = os.getenv("TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN") or os.getenv("HF_TOKEN")
+
+        # Upload all files in the output directory
+        from pathlib import Path
+
+        output_path = Path(output_dir)
+
+        for file_path in output_path.rglob("*"):
+            if file_path.is_file():
+                # Calculate relative path from output_dir
+                relative_path = file_path.relative_to(output_path)
+                path_in_repo = f"{repo_path}/{relative_path}"
+
+                logger.debug(f"Uploading {file_path} to {path_in_repo}")
+
+                api.upload_file(
+                    path_or_fileobj=str(file_path),
+                    path_in_repo=path_in_repo,
+                    repo_id=dataset_name,
+                    repo_type="dataset",
+                    token=token,
+                    commit_message=f"Upload benchmark results for run {run_id}",
+                )
+
+        logger.info(
+            f"Successfully uploaded results to: https://huggingface.co/datasets/{dataset_name}/tree/main/{repo_path}"
+        )
+
+        return run_id
+
+    except Exception as upload_error:
+        logger.error(f"Failed to upload results: {upload_error}")
+        import traceback
+
+        logger.debug(traceback.format_exc())
+        return None
+
+
 def main():
     """Main entry point for the benchmarking script."""
-    parser = argparse.ArgumentParser(description="Run all benchmarks in the ./benches directory")
+    # Generate a unique UUID for this benchmark run
+    benchmark_run_uuid = str(uuid.uuid4())[:8]
+
+    parser = argparse.ArgumentParser(
+        description="Run all benchmarks in the ./benches directory",
+        epilog="""
+Examples:
+  # Run all available benchmarks
+  python3 run_benchmarks.py
+  
+  # Run with specific model and upload to HuggingFace Dataset
+  python3 run_benchmarks.py --model-id meta-llama/Llama-2-7b-hf --upload-to-hf username/benchmark-results
+  
+  # Run with custom run ID and upload to HuggingFace Dataset
+  python3 run_benchmarks.py --run-id experiment_v1 --upload-to-hf org/benchmarks
+  
+  # Run only specific benchmarks with file logging
+  python3 run_benchmarks.py --include llama --enable-file-logging
+        """,  # noqa: W293
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
 
     parser.add_argument(
         "--output-dir",
@@ -228,20 +341,29 @@ def main():
 
     parser.add_argument("--exclude", type=str, nargs="*", help="Exclude benchmarks matching these names")
 
-    parser.add_argument("--enable-mock", action="store_true", help="Enable mock benchmark (skipped by default)")
-
     parser.add_argument("--enable-file-logging", action="store_true", help="Enable file logging (disabled by default)")
 
     parser.add_argument(
         "--commit-id", type=str, help="Git commit ID for metadata (if not provided, will auto-detect from git)"
     )
 
+    parser.add_argument(
+        "--upload-to-hub",
+        type=str,
+        help="Upload results to HuggingFace Dataset (provide dataset name, e.g., 'username/benchmark-results')",
+    )
+
+    parser.add_argument(
+        "--run-id", type=str, help="Custom run ID for organizing results (if not provided, will generate a unique ID)"
+    )
+
     args = parser.parse_args()
 
     # Setup logging
     logger = setup_logging(args.log_level, args.enable_file_logging)
 
     logger.info("Starting benchmark discovery and execution")
+    logger.info(f"Benchmark run UUID: {benchmark_run_uuid}")
     logger.info(f"Output directory: {args.output_dir}")
     logger.info(f"Benches directory: {args.benches_dir}")
 
@@ -286,9 +408,6 @@ def main():
         if args.model_id:
             benchmark_kwargs["model_id"] = args.model_id
 
-        # Add enable_mock flag for mock benchmark
-        benchmark_kwargs["enable_mock"] = args.enable_mock
-
         # Add commit_id if provided
         if args.commit_id:
             benchmark_kwargs["commit_id"] = args.commit_id
@@ -306,7 +425,27 @@ def main():
                 successful_count += 1
 
         # Generate summary report
-        summary_file = generate_summary_report(args.output_dir, benchmark_results, logger)
+        summary_file = generate_summary_report(args.output_dir, benchmark_results, logger, benchmark_run_uuid)
+
+        # Upload results to HuggingFace Dataset if requested
+        upload_run_id = None
+        if args.upload_to_hub:
+            logger.info("=" * 60)
+            logger.info("UPLOADING TO HUGGINGFACE DATASET")
+            logger.info("=" * 60)
+            # Use provided run_id or fallback to benchmark run UUID
+            effective_run_id = args.run_id or benchmark_run_uuid
+            upload_run_id = upload_results_to_hf_dataset(
+                output_dir=args.output_dir,
+                summary_file=summary_file,
+                dataset_name=args.upload_to_hub,
+                run_id=effective_run_id,
+                logger=logger,
+            )
+            if upload_run_id:
+                logger.info(f"Upload completed with run ID: {upload_run_id}")
+            else:
+                logger.warning("Upload failed - continuing with local results")
 
         # Final summary
         total_benchmarks = len(filtered_benchmarks)
@@ -321,6 +460,16 @@ def main():
         logger.info(f"Output directory: {args.output_dir}")
         logger.info(f"Summary report: {summary_file}")
 
+        if args.upload_to_hub:
+            if upload_run_id:
+                logger.info(f"HuggingFace Dataset: {args.upload_to_hub}")
+                logger.info(f"Run ID: {upload_run_id}")
+                logger.info(
+                    f"View results: https://huggingface.co/datasets/{args.upload_to_hub}/tree/main/{datetime.now().strftime('%Y-%m-%d')}/runs/{upload_run_id}"
+                )
+            else:
+                logger.warning("Upload to HuggingFace Dataset failed")
+
         if failed_count > 0:
             logger.warning(f"{failed_count} benchmark(s) failed. Check logs for details.")
             return 1