tc-mb · Guo-Chenxu · Aug 28, 2025 · Aug 12, 2025 · Aug 12, 2025 · Aug 12, 2025
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -109,7 +109,9 @@ def __post_init__(self):
                 self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev"
             print(f"Using {self.docker_image} docker image")
         if self.install_steps is None:
-            self.install_steps = ["uv venv && uv pip install ."]
+            self.install_steps = ["uv pip install ."]
+        # Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded`
+        self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh")
         if self.pytest_options is None:
             self.pytest_options = {}
         if isinstance(self.tests_to_run, str):
@@ -213,7 +215,7 @@ def job_name(self):
     docker_image=[{"image": "huggingface/transformers-torch-light"}],
     # networkx==3.3 (after #36957) cause some issues
     # TODO: remove this once it works directly
-    install_steps=["uv venv && uv pip install ."],
+    install_steps=["uv pip install ."],
     marker="generate",
     parallelism=6,
 )
@@ -250,7 +252,7 @@ def job_name(self):
     additional_env={"OMP_NUM_THREADS": 8},
     docker_image=[{"image":"huggingface/transformers-examples-torch"}],
     # TODO @ArthurZucker remove this once docker is easier to build
-    install_steps=["uv venv && uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
+    install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"],
     pytest_num_workers=4,
 )
 
@@ -259,7 +261,7 @@ def job_name(self):
     additional_env={"HUGGINGFACE_CO_STAGING": True},
     docker_image=[{"image":"huggingface/transformers-torch-light"}],
     install_steps=[
-        'uv venv && uv pip install .',
+        'uv pip install .',
         'git config --global user.email "ci@dummy.com"',
         'git config --global user.name "ci"',
     ],
@@ -273,7 +275,6 @@ def job_name(self):
     "onnx",
     docker_image=[{"image":"huggingface/transformers-torch-tf-light"}],
     install_steps=[
-        "uv venv",
         "uv pip install .[testing,sentencepiece,onnxruntime,vision,rjieba]",
     ],
     pytest_options={"k onnx": None},
@@ -303,7 +304,7 @@ def job_name(self):
     docker_image=[{"image": "huggingface/transformers-torch-light"}],
     # networkx==3.3 (after #36957) cause some issues
     # TODO: remove this once it works directly
-    install_steps=["uv venv && uv pip install ."],
+    install_steps=["uv pip install .[serving]"],
     marker="not generate",
     parallelism=6,
 )
@@ -321,7 +322,7 @@ def job_name(self):
     additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"},
     install_steps=[
         # Add an empty file to keep the test step running correctly even no file is selected to be tested.
-        "uv venv && pip install .",
+        "uv pip install .",
         "touch dummy.py",
         command,
         "cat pr_documentation_tests_temp.txt",

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -48,7 +48,7 @@ jobs:
 
       - name: Run database init script
         run: |
-          psql -f benchmark/init_db.sql
+          psql -f benchmark/utils/init_db.sql
         env:
           PGDATABASE: metrics
           PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}

diff --git a/.github/workflows/check_failed_tests.yml b/.github/workflows/check_failed_tests.yml
@@ -21,6 +21,9 @@ on:
       report_repo_id:
         required: true
         type: string
+      commit_sha:
+        required: false
+        type: string
 
 
 env:
@@ -41,7 +44,7 @@ jobs:
   check_new_failures:
     name: " "
     runs-on:
-      group: aws-g4dn-4xlarge-cache
+      group: aws-g5-4xlarge-cache
     container:
       image: ${{ inputs.docker }}
       options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
@@ -87,7 +90,7 @@ jobs:
       - name: Update clone
         working-directory: /transformers
         if: ${{ env.process == 'true' }}
-        run: git fetch && git checkout ${{ github.sha }}
+        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
 
       - name: Get target commit
         working-directory: /transformers/utils

diff --git a/.github/workflows/collated-reports.yml b/.github/workflows/collated-reports.yml
@@ -0,0 +1,43 @@
+name: CI collated reports
+
+on:
+  workflow_call:
+    inputs:
+      job:
+        required: true
+        type: string
+      report_repo_id:
+        required: true
+        type: string
+      machine_type:
+        required: true
+        type: string
+      gpu_name:
+        description: Name of the GPU used for the job. Its enough that the value contains the name of the GPU, e.g. "noise-h100-more-noise". Case insensitive.
+        required: true
+        type: string
+
+jobs:
+  collated_reports:
+    name: Collated reports
+    runs-on: ubuntu-22.04
+    if: always()
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/download-artifact@v4
+
+      - name: Collated reports
+        shell: bash
+        env:
+          ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }}
+          CI_SHA: ${{ github.sha }}
+          TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN: ${{ secrets.TRANSFORMERS_CI_RESULTS_UPLOAD_TOKEN }}
+        run: |
+          pip install huggingface_hub
+          python3 utils/collated_reports.py                  \
+            --path .                                         \
+            --machine-type ${{ inputs.machine_type }}        \
+            --commit-hash ${{ env.CI_SHA }}                  \
+            --job ${{ inputs.job }}                          \
+            --report-repo-id ${{ inputs.report_repo_id }}    \
+            --gpu-name ${{ inputs.gpu_name }}
diff --git a/.github/workflows/doctest_job.yml b/.github/workflows/doctest_job.yml
@@ -28,10 +28,10 @@ jobs:
       matrix:
         split_keys: ${{ fromJson(inputs.split_keys) }}
     runs-on: 
-      group: aws-g4dn-4xlarge-cache
+      group: aws-g5-4xlarge-cache
     container:
       image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
     steps:
       - name: Update clone
         working-directory: /transformers

diff --git a/.github/workflows/doctests.yml b/.github/workflows/doctests.yml
@@ -15,10 +15,10 @@ jobs:
   setup:
     name: Setup
     runs-on: 
-      group: aws-g4dn-4xlarge-cache
+      group: aws-g5-4xlarge-cache
     container:
       image: huggingface/transformers-all-latest-gpu
-      options: --gpus 0 --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
+      options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
     outputs:
       job_splits: ${{ steps.set-matrix.outputs.job_splits }}
       split_keys: ${{ steps.set-matrix.outputs.split_keys }}

diff --git a/.github/workflows/get-pr-info.yml b/.github/workflows/get-pr-info.yml
@@ -0,0 +1,157 @@
+name: Get PR commit SHA
+on:
+  workflow_call:
+    inputs:
+      pr_number:
+        required: true
+        type: string
+    outputs:
+      PR_HEAD_REPO_FULL_NAME:
+        description: "The full name of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_FULL_NAME }}
+      PR_BASE_REPO_FULL_NAME:
+        description: "The full name of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_FULL_NAME }}
+      PR_HEAD_REPO_OWNER:
+        description: "The owner of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_OWNER }}
+      PR_BASE_REPO_OWNER:
+        description: "The owner of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_OWNER }}
+      PR_HEAD_REPO_NAME:
+        description: "The name of the repository from which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REPO_NAME }}
+      PR_BASE_REPO_NAME:
+        description: "The name of the repository to which the pull request is created"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REPO_NAME }}
+      PR_HEAD_REF:
+        description: "The branch name of the pull request in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_REF }}
+      PR_BASE_REF:
+        description: "The branch name in the base repository (to merge into)"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_REF }}
+      PR_HEAD_SHA:
+        description: "The head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_SHA }}
+      PR_BASE_SHA:
+        description: "The head sha of the target branch in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_BASE_SHA }}
+      PR_MERGE_COMMIT_SHA:
+        description: "The sha of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_SHA }}
+      PR_HEAD_COMMIT_DATE:
+        description: "The date of the head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_DATE }}
+      PR_MERGE_COMMIT_DATE:
+        description: "The date of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_DATE }}
+      PR_HEAD_COMMIT_TIMESTAMP:
+        description: "The timestamp of the head sha of the pull request branch in the head repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_HEAD_COMMIT_TIMESTAMP }}
+      PR_MERGE_COMMIT_TIMESTAMP:
+        description: "The timestamp of the merge commit for the pull request (created by GitHub) in the base repository"
+        value: ${{ jobs.get-pr-info.outputs.PR_MERGE_COMMIT_TIMESTAMP }}
+      PR:
+        description: "The PR"
+        value: ${{ jobs.get-pr-info.outputs.PR }}
+      PR_FILES:
+        description: "The files touched in the PR"
+        value: ${{ jobs.get-pr-info.outputs.PR_FILES }}
+
+
+jobs:
+  get-pr-info:
+    runs-on: ubuntu-22.04
+    name: Get PR commit SHA better
+    outputs:
+      PR_HEAD_REPO_FULL_NAME: ${{ steps.pr_info.outputs.head_repo_full_name }}
+      PR_BASE_REPO_FULL_NAME: ${{ steps.pr_info.outputs.base_repo_full_name }}
+      PR_HEAD_REPO_OWNER: ${{ steps.pr_info.outputs.head_repo_owner }}
+      PR_BASE_REPO_OWNER: ${{ steps.pr_info.outputs.base_repo_owner }}
+      PR_HEAD_REPO_NAME: ${{ steps.pr_info.outputs.head_repo_name }}
+      PR_BASE_REPO_NAME: ${{ steps.pr_info.outputs.base_repo_name }}
+      PR_HEAD_REF: ${{ steps.pr_info.outputs.head_ref }}
+      PR_BASE_REF: ${{ steps.pr_info.outputs.base_ref }}
+      PR_HEAD_SHA: ${{ steps.pr_info.outputs.head_sha }}
+      PR_BASE_SHA: ${{ steps.pr_info.outputs.base_sha }}
+      PR_MERGE_COMMIT_SHA: ${{ steps.pr_info.outputs.merge_commit_sha }}
+      PR_HEAD_COMMIT_DATE: ${{ steps.pr_info.outputs.head_commit_date }}
+      PR_MERGE_COMMIT_DATE: ${{ steps.pr_info.outputs.merge_commit_date }}
+      PR_HEAD_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.head_commit_timestamp }}
+      PR_MERGE_COMMIT_TIMESTAMP: ${{ steps.get_timestamps.outputs.merge_commit_timestamp }}
+      PR: ${{ steps.pr_info.outputs.pr }}
+      PR_FILES: ${{ steps.pr_info.outputs.files }}
+    if: ${{ inputs.pr_number != '' }}
+    steps:
+      - name: Extract PR details
+        id: pr_info
+        uses: actions/github-script@v6
+        with:
+          script: |            
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: ${{ inputs.pr_number }}
+            });
+
+            const { data: head_commit }  = await github.rest.repos.getCommit({
+              owner: pr.head.repo.owner.login,
+              repo: pr.head.repo.name,
+              ref: pr.head.ref
+            });
+
+            const { data: merge_commit }  = await github.rest.repos.getCommit({
+              owner: pr.base.repo.owner.login,
+              repo: pr.base.repo.name,
+              ref: pr.merge_commit_sha,
+            });
+
+            const { data: files } = await github.rest.pulls.listFiles({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: ${{ inputs.pr_number }}
+            });
+
+            core.setOutput('head_repo_full_name', pr.head.repo.full_name);
+            core.setOutput('base_repo_full_name', pr.base.repo.full_name);
+            core.setOutput('head_repo_owner', pr.head.repo.owner.login);
+            core.setOutput('base_repo_owner', pr.base.repo.owner.login);
+            core.setOutput('head_repo_name', pr.head.repo.name);
+            core.setOutput('base_repo_name', pr.base.repo.name);
+            core.setOutput('head_ref', pr.head.ref);
+            core.setOutput('base_ref', pr.base.ref);
+            core.setOutput('head_sha', pr.head.sha);
+            core.setOutput('base_sha', pr.base.sha);
+            core.setOutput('merge_commit_sha', pr.merge_commit_sha);
+            core.setOutput('pr', pr);
+
+            core.setOutput('head_commit_date', head_commit.commit.committer.date);
+            core.setOutput('merge_commit_date', merge_commit.commit.committer.date);
+
+            core.setOutput('files', files);            
+
+            console.log('PR head commit:', {
+              head_commit: head_commit,
+              commit: head_commit.commit,
+              date: head_commit.commit.committer.date
+            });
+
+            console.log('PR merge commit:', {
+              merge_commit: merge_commit,
+              commit: merge_commit.commit,
+              date: merge_commit.commit.committer.date
+            });
+
+      - name: Convert dates to timestamps
+        id: get_timestamps
+        run: |
+          head_commit_date=${{ steps.pr_info.outputs.head_commit_date }}
+          merge_commit_date=${{ steps.pr_info.outputs.merge_commit_date }}
+          echo $head_commit_date
+          echo $merge_commit_date
+          head_commit_timestamp=$(date -d "$head_commit_date" +%s)
+          merge_commit_timestamp=$(date -d "$merge_commit_date" +%s)
+          echo $head_commit_timestamp
+          echo $merge_commit_timestamp
+          echo "head_commit_timestamp=$head_commit_timestamp" >> $GITHUB_OUTPUT
+          echo "merge_commit_timestamp=$merge_commit_timestamp" >> $GITHUB_OUTPUT
diff --git a/.github/workflows/get-pr-number.yml b/.github/workflows/get-pr-number.yml
@@ -0,0 +1,36 @@
+name: Get PR number
+on:
+  workflow_call:
+    outputs:
+      PR_NUMBER:
+        description: "The extracted PR number"
+        value: ${{ jobs.get-pr-number.outputs.PR_NUMBER }}
+
+jobs:
+  get-pr-number:
+    runs-on: ubuntu-22.04
+    name: Get PR number
+    outputs:
+      PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }}
+    steps:
+      - name: Get PR number
+        shell: bash
+        run: |
+          if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV
+          elif [[ "${{ github.event.pull_request.number }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
+          elif [[ "${{ github.event.pull_request }}" != "" ]]; then
+            echo "PR_NUMBER=${{ github.event.number }}" >> $GITHUB_ENV
+          else
+            echo "PR_NUMBER=" >> $GITHUB_ENV
+          fi
+
+      - name: Check PR number
+        shell: bash
+        run: |
+          echo "${{ env.PR_NUMBER }}"
+
+      - name: Set PR number
+        id: set_pr_number
+        run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT"
diff --git a/.github/workflows/model_jobs.yml b/.github/workflows/model_jobs.yml
@@ -18,6 +18,9 @@ on:
       docker:
         required: true
         type: string
+      commit_sha:
+        required: false
+        type: string
       report_name_prefix:
         required: false
         default: run_models_gpu
@@ -70,7 +73,7 @@ jobs:
 
       - name: Update clone
         working-directory: /transformers
-        run: git fetch && git checkout ${{ github.sha }}
+        run: git fetch && git checkout ${{ inputs.commit_sha || github.sha }}
 
       - name: Reinstall transformers in edit mode (remove the one installed during docker image build)
         working-directory: /transformers
@@ -107,9 +110,9 @@ jobs:
         run: |
           echo "${{ inputs.machine_type }}"
 
-          if [ "${{ inputs.machine_type }}" = "aws-g4dn-4xlarge-cache" ]; then
+          if [ "${{ inputs.machine_type }}" = "aws-g5-4xlarge-cache" ]; then
             machine_type=single-gpu
-          elif [ "${{ inputs.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then
+          elif [ "${{ inputs.machine_type }}" = "aws-g5-12xlarge-cache" ]; then
             machine_type=multi-gpu
           else
             machine_type=${{ inputs.machine_type }}