huggingface
diff --git a/‎.circleci/config.yml‎
Lines changed: 9 additions & 3 deletions b/‎.circleci/config.yml‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎.circleci/create_circleci_config.py‎
Lines changed: 24 additions & 2 deletions b/‎.circleci/create_circleci_config.py‎
Lines changed: 24 additions & 2 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 3 additions & 3 deletions b/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/scripts/assign_reviewers.py‎
Lines changed: 98 additions & 0 deletions b/‎.github/scripts/assign_reviewers.py‎
Lines changed: 98 additions & 0 deletions
@@ -31,6 +31,14 @@ jobs:
         parallelism: 1
         steps:
             - checkout
+            - run: if [[ "$CIRCLE_PULL_REQUEST" == "" && "$CIRCLE_BRANCH" != "main" && "$CIRCLE_BRANCH" != *-release ]]; then echo "Not a PR, not the main branch and not a release branch, skip test!"; circleci-agent step halt; fi
+            - run: 'curl -L -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pulls/${CIRCLE_PULL_REQUEST##*/} >> github.txt'
+            - run: cat github.txt
+            - run: (python3 -c 'import json; from datetime import datetime; fp = open("github.txt"); data = json.load(fp); fp.close(); f = "%Y-%m-%dT%H:%M:%SZ"; created = datetime.strptime(data["created_at"], f); updated = datetime.strptime(data["updated_at"], f); s = (updated - created).total_seconds(); print(int(s))' || true) > elapsed.txt
+            - run: if [ "$(cat elapsed.txt)" == "" ]; then echo 60 > elapsed.txt; fi
+            - run: cat elapsed.txt
+            - run: if [ "$(cat elapsed.txt)" -lt "30" ]; then echo "PR is just opened, wait some actions from GitHub"; sleep 30; fi
+            - run: 'if grep -q "\"draft\": true," github.txt; then echo "draft mode, skip test!"; circleci-agent step halt; fi'
             - run: uv pip install -U -e .
             - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
             - run: mkdir -p test_preparation
@@ -170,8 +178,7 @@ jobs:
             - store_artifacts:
                   path: ~/transformers/installed.txt
             - run: python utils/check_copies.py
-            - run: python utils/check_modular_conversion.py --num_workers 4
-            - run: python utils/check_table.py
+            - run: python utils/check_modular_conversion.py
             - run: python utils/check_dummies.py
             - run: python utils/check_repo.py
             - run: python utils/check_inits.py
@@ -181,7 +188,6 @@ jobs:
             - run: make deps_table_check_updated
             - run: python utils/update_metadata.py --check-only
             - run: python utils/check_docstrings.py
-            - run: python utils/check_support_list.py
 
 workflows:
     version: 2
 
@@ -33,6 +33,26 @@
 COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None}
 DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
 
+# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures`
+# to rerun the tests that match these patterns.
+FLAKY_TEST_FAILURE_PATTERNS = [
+    "OSError",  # Machine/connection transient error
+    "Timeout",  # Machine/connection transient error
+    "ConnectionError",  # Connection transient error
+    "FileNotFoundError",  # Raised by `datasets` on Hub failures
+    "PIL.UnidentifiedImageError",  # Raised by `PIL.Image.open` on connection issues
+    "HTTPError.*502",  # Hub-related
+    "HTTPError.*504",  # Hub-related
+    "AssertionError: Tensor-likes are not close!",  # `torch.testing.assert_close`, we might have unlucky random values
+    # TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle
+    # them under a single message.
+    "TypeError: expected str, bytes or os.PathLike object, not NoneType",
+    "TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType",
+    "Converting from Tiktoken failed",
+    "KeyError: <class ",
+    "TypeError: not a string",
+]
+
 
 class EmptyJob:
     job_name = "empty"
@@ -124,7 +144,9 @@ def to_dict(self):
                 # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
         timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
         marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
-        additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
+        junit_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
+        joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
+        repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
         parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
         steps = [
             "checkout",
@@ -152,7 +174,7 @@ def to_dict(self):
             },
             {"run": {
                 "name": "Run tests",
-                "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
+                "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
             },
             {"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
             {"run": {"name": "Failed tests: show reasons",   "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},
 
@@ -38,12 +38,12 @@ body:
 
           - text models: @ArthurZucker
           - vision models: @amyeroberts, @qubvel
-          - speech models: @ylacombe, @eustlb
+          - speech models: @eustlb
           - graph models: @clefourrier
 
         Library:
 
-          - flax: @sanchit-gandhi
+          - flax: @gante and @Rocketknight1
           - generate: @zucchini-nlp (visual-language models) or @gante (all others)
           - pipelines: @Rocketknight1
           - tensorflow: @gante and @Rocketknight1
@@ -72,7 +72,7 @@ body:
 
         Maintained examples (not research project or legacy):
 
-          - Flax: @sanchit-gandhi
+          - Flax: @Rocketknight1
           - PyTorch: See Models above and tag the person corresponding to the modality of the example.
           - TensorFlow: @Rocketknight1
 
 
@@ -41,12 +41,12 @@ Models:
 
 - text models: @ArthurZucker
 - vision models: @amyeroberts, @qubvel
-- speech models: @ylacombe, @eustlb
+- speech models: @eustlb
 - graph models: @clefourrier
 
 Library:
 
-- flax: @sanchit-gandhi
+- flax: @gante and @Rocketknight1
 - generate: @zucchini-nlp (visual-language models) or @gante (all others)
 - pipelines: @Rocketknight1
 - tensorflow: @gante and @Rocketknight1
@@ -72,7 +72,7 @@ HF projects:
 
 Maintained examples (not research project or legacy):
 
-- Flax: @sanchit-gandhi
+- Flax: @Rocketknight1
 - PyTorch: See Models above and tag the person corresponding to the modality of the example.
 - TensorFlow: @Rocketknight1
 
 
@@ -0,0 +1,98 @@
+# coding=utf-8
+# Copyright 2025 the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import github
+import json
+from github import Github
+import re
+from collections import Counter
+from pathlib import Path
+
+def pattern_to_regex(pattern):
+    start_anchor = pattern.startswith("/")
+    pattern = re.escape(pattern)
+    # Replace `*` with "any number of non-slash characters"
+    pattern = pattern.replace(r"\*", "[^/]*")
+    if start_anchor:
+        pattern = "^" + pattern
+    return pattern
+
+def get_file_owners(file_path, codeowners_lines):
+    # Process lines in reverse (last matching pattern takes precedence)
+    for line in reversed(codeowners_lines):
+        # Skip comments and empty lines, strip inline comments
+        line = line.split('#')[0].strip()
+        if not line:
+            continue
+
+        # Split into pattern and owners
+        parts = line.split()
+        pattern = parts[0]
+        # Can be empty, e.g. for dummy files with explicitly no owner!
+        owners = [owner.removeprefix("@") for owner in parts[1:]]
+
+        # Check if file matches pattern
+        file_regex = pattern_to_regex(pattern)
+        if re.search(file_regex, file_path) is not None:
+            return owners  # Remember, can still be empty!
+    return []  # Should never happen, but just in case
+
+def main():
+    script_dir = Path(__file__).parent.absolute()
+    with open(script_dir / "codeowners_for_review_action") as f:
+        codeowners_lines = f.readlines()
+
+    g = Github(os.environ['GITHUB_TOKEN'])
+    repo = g.get_repo("huggingface/transformers")
+    with open(os.environ['GITHUB_EVENT_PATH']) as f:
+        event = json.load(f)
+
+    # The PR number is available in the event payload
+    pr_number = event['pull_request']['number']
+    pr = repo.get_pull(pr_number)
+    pr_author = pr.user.login
+
+    existing_reviews = list(pr.get_reviews())
+    if existing_reviews:
+        print(f"Already has reviews: {[r.user.login for r in existing_reviews]}")
+        return
+
+    users_requested, teams_requested = pr.get_review_requests()
+    users_requested = list(users_requested)
+    if users_requested:
+        print(f"Reviewers already requested: {users_requested}")
+        return
+
+    locs_per_owner = Counter()
+    for file in pr.get_files():
+        owners = get_file_owners(file.filename, codeowners_lines)
+        for owner in owners:
+            locs_per_owner[owner] += file.changes
+
+    # Assign the top 2 based on locs changed as reviewers, but skip the owner if present
+    locs_per_owner.pop(pr_author, None)
+    top_owners = locs_per_owner.most_common(2)
+    print("Top owners", top_owners)
+    top_owners = [owner[0] for owner in top_owners]
+    try:
+        pr.create_review_request(top_owners)
+    except github.GithubException as e:
+        print(f"Failed to request review for {top_owners}: {e}")
+
+
+
+if __name__ == "__main__":
+    main()