Skip to content

Commit 24f1be4

Browse files
authored
Merge branch 'main' into ds_tp_upstream2
2 parents 93591f4 + 69bc848 commit 24f1be4

File tree

1,269 files changed

+26876
-83058
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,269 files changed

+26876
-83058
lines changed

.circleci/config.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ jobs:
3131
parallelism: 1
3232
steps:
3333
- checkout
34+
- run: if [[ "$CIRCLE_PULL_REQUEST" == "" && "$CIRCLE_BRANCH" != "main" && "$CIRCLE_BRANCH" != *-release ]]; then echo "Not a PR, not the main branch and not a release branch, skip test!"; circleci-agent step halt; fi
35+
- run: 'curl -L -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" https://api.github.com/repos/$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME/pulls/${CIRCLE_PULL_REQUEST##*/} >> github.txt'
36+
- run: cat github.txt
37+
- run: (python3 -c 'import json; from datetime import datetime; fp = open("github.txt"); data = json.load(fp); fp.close(); f = "%Y-%m-%dT%H:%M:%SZ"; created = datetime.strptime(data["created_at"], f); updated = datetime.strptime(data["updated_at"], f); s = (updated - created).total_seconds(); print(int(s))' || true) > elapsed.txt
38+
- run: if [ "$(cat elapsed.txt)" == "" ]; then echo 60 > elapsed.txt; fi
39+
- run: cat elapsed.txt
40+
- run: if [ "$(cat elapsed.txt)" -lt "30" ]; then echo "PR is just opened, wait some actions from GitHub"; sleep 30; fi
41+
- run: 'if grep -q "\"draft\": true," github.txt; then echo "draft mode, skip test!"; circleci-agent step halt; fi'
3442
- run: uv pip install -U -e .
3543
- run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV"
3644
- run: mkdir -p test_preparation
@@ -170,8 +178,7 @@ jobs:
170178
- store_artifacts:
171179
path: ~/transformers/installed.txt
172180
- run: python utils/check_copies.py
173-
- run: python utils/check_modular_conversion.py --num_workers 4
174-
- run: python utils/check_table.py
181+
- run: python utils/check_modular_conversion.py
175182
- run: python utils/check_dummies.py
176183
- run: python utils/check_repo.py
177184
- run: python utils/check_inits.py
@@ -181,7 +188,6 @@ jobs:
181188
- run: make deps_table_check_updated
182189
- run: python utils/update_metadata.py --check-only
183190
- run: python utils/check_docstrings.py
184-
- run: python utils/check_support_list.py
185191

186192
workflows:
187193
version: 2

.circleci/create_circleci_config.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,26 @@
3333
COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "vvv": None, "rsfE":None}
3434
DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}]
3535

36+
# Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures`
37+
# to rerun the tests that match these patterns.
38+
FLAKY_TEST_FAILURE_PATTERNS = [
39+
"OSError", # Machine/connection transient error
40+
"Timeout", # Machine/connection transient error
41+
"ConnectionError", # Connection transient error
42+
"FileNotFoundError", # Raised by `datasets` on Hub failures
43+
"PIL.UnidentifiedImageError", # Raised by `PIL.Image.open` on connection issues
44+
"HTTPError.*502", # Hub-related
45+
"HTTPError.*504", # Hub-related
46+
"AssertionError: Tensor-likes are not close!", # `torch.testing.assert_close`, we might have unlucky random values
47+
# TODO: error downloading tokenizer's `merged.txt` from hub can cause all the exceptions below. Throw and handle
48+
# them under a single message.
49+
"TypeError: expected str, bytes or os.PathLike object, not NoneType",
50+
"TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType",
51+
"Converting from Tiktoken failed",
52+
"KeyError: <class ",
53+
"TypeError: not a string",
54+
]
55+
3656

3757
class EmptyJob:
3858
job_name = "empty"
@@ -124,7 +144,9 @@ def to_dict(self):
124144
# Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
125145
timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else ""
126146
marker_cmd = f"-m '{self.marker}'" if self.marker is not None else ""
127-
additional_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
147+
junit_flags = f" -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml"
148+
joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS)
149+
repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'"
128150
parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> '
129151
steps = [
130152
"checkout",
@@ -152,7 +174,7 @@ def to_dict(self):
152174
},
153175
{"run": {
154176
"name": "Run tests",
155-
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {additional_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
177+
"command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"}
156178
},
157179
{"run": {"name": "Expand to show skipped tests", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}},
158180
{"run": {"name": "Failed tests: show reasons", "when": "always", "command": f"python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}},

.github/ISSUE_TEMPLATE/bug-report.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,12 @@ body:
3838
3939
- text models: @ArthurZucker
4040
- vision models: @amyeroberts, @qubvel
41-
- speech models: @ylacombe, @eustlb
41+
- speech models: @eustlb
4242
- graph models: @clefourrier
4343
4444
Library:
4545
46-
- flax: @sanchit-gandhi
46+
- flax: @gante and @Rocketknight1
4747
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
4848
- pipelines: @Rocketknight1
4949
- tensorflow: @gante and @Rocketknight1
@@ -72,7 +72,7 @@ body:
7272
7373
Maintained examples (not research project or legacy):
7474
75-
- Flax: @sanchit-gandhi
75+
- Flax: @Rocketknight1
7676
- PyTorch: See Models above and tag the person corresponding to the modality of the example.
7777
- TensorFlow: @Rocketknight1
7878

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,12 @@ Models:
4141
4242
- text models: @ArthurZucker
4343
- vision models: @amyeroberts, @qubvel
44-
- speech models: @ylacombe, @eustlb
44+
- speech models: @eustlb
4545
- graph models: @clefourrier
4646
4747
Library:
4848
49-
- flax: @sanchit-gandhi
49+
- flax: @gante and @Rocketknight1
5050
- generate: @zucchini-nlp (visual-language models) or @gante (all others)
5151
- pipelines: @Rocketknight1
5252
- tensorflow: @gante and @Rocketknight1
@@ -72,7 +72,7 @@ HF projects:
7272
7373
Maintained examples (not research project or legacy):
7474
75-
- Flax: @sanchit-gandhi
75+
- Flax: @Rocketknight1
7676
- PyTorch: See Models above and tag the person corresponding to the modality of the example.
7777
- TensorFlow: @Rocketknight1
7878
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# coding=utf-8
2+
# Copyright 2025 the HuggingFace Inc. team. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import os
17+
import github
18+
import json
19+
from github import Github
20+
import re
21+
from collections import Counter
22+
from pathlib import Path
23+
24+
def pattern_to_regex(pattern):
25+
start_anchor = pattern.startswith("/")
26+
pattern = re.escape(pattern)
27+
# Replace `*` with "any number of non-slash characters"
28+
pattern = pattern.replace(r"\*", "[^/]*")
29+
if start_anchor:
30+
pattern = "^" + pattern
31+
return pattern
32+
33+
def get_file_owners(file_path, codeowners_lines):
34+
# Process lines in reverse (last matching pattern takes precedence)
35+
for line in reversed(codeowners_lines):
36+
# Skip comments and empty lines, strip inline comments
37+
line = line.split('#')[0].strip()
38+
if not line:
39+
continue
40+
41+
# Split into pattern and owners
42+
parts = line.split()
43+
pattern = parts[0]
44+
# Can be empty, e.g. for dummy files with explicitly no owner!
45+
owners = [owner.removeprefix("@") for owner in parts[1:]]
46+
47+
# Check if file matches pattern
48+
file_regex = pattern_to_regex(pattern)
49+
if re.search(file_regex, file_path) is not None:
50+
return owners # Remember, can still be empty!
51+
return [] # Should never happen, but just in case
52+
53+
def main():
54+
script_dir = Path(__file__).parent.absolute()
55+
with open(script_dir / "codeowners_for_review_action") as f:
56+
codeowners_lines = f.readlines()
57+
58+
g = Github(os.environ['GITHUB_TOKEN'])
59+
repo = g.get_repo("huggingface/transformers")
60+
with open(os.environ['GITHUB_EVENT_PATH']) as f:
61+
event = json.load(f)
62+
63+
# The PR number is available in the event payload
64+
pr_number = event['pull_request']['number']
65+
pr = repo.get_pull(pr_number)
66+
pr_author = pr.user.login
67+
68+
existing_reviews = list(pr.get_reviews())
69+
if existing_reviews:
70+
print(f"Already has reviews: {[r.user.login for r in existing_reviews]}")
71+
return
72+
73+
users_requested, teams_requested = pr.get_review_requests()
74+
users_requested = list(users_requested)
75+
if users_requested:
76+
print(f"Reviewers already requested: {users_requested}")
77+
return
78+
79+
locs_per_owner = Counter()
80+
for file in pr.get_files():
81+
owners = get_file_owners(file.filename, codeowners_lines)
82+
for owner in owners:
83+
locs_per_owner[owner] += file.changes
84+
85+
# Assign the top 2 based on locs changed as reviewers, but skip the owner if present
86+
locs_per_owner.pop(pr_author, None)
87+
top_owners = locs_per_owner.most_common(2)
88+
print("Top owners", top_owners)
89+
top_owners = [owner[0] for owner in top_owners]
90+
try:
91+
pr.create_review_request(top_owners)
92+
except github.GithubException as e:
93+
print(f"Failed to request review for {top_owners}: {e}")
94+
95+
96+
97+
if __name__ == "__main__":
98+
main()

0 commit comments

Comments
 (0)