leloykun · YeLuoSuiYou · Jul 24, 2024 · Jul 24, 2024 · Jul 24, 2024 · Jul 24, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -142,6 +142,7 @@ jobs:
             - run: python utils/custom_init_isort.py --check_only
             - run: python utils/sort_auto_mappings.py --check_only
             - run: python utils/check_doc_toc.py
+            - run: python utils/check_docstrings.py --check_all
 
     check_repository_consistency:
         working_directory: ~/transformers
@@ -190,4 +191,4 @@ workflows:
             - check_circleci_user
             - check_code_quality
             - check_repository_consistency
-            - fetch_all_tests
+            - fetch_all_tests
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -121,11 +121,16 @@ def to_dict(self):
         )
 
         steps.append({"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}})
+
+        # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues
+        if "examples" in self.name:
+            steps.append({"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """}})
+
         test_command = ""
         if self.command_timeout:
             test_command = f"timeout {self.command_timeout} "
         # junit familiy xunit1 is necessary to support splitting on test name or class name with circleci split
-        test_command += f"python3 -m pytest -rsfE -p no:warnings -o junit_family=xunit1 --tb=short --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+        test_command += f"python3 -m pytest -rsfE -p no:warnings --tb=short -o junit_family=xunit1 --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags)
 
         if self.parallelism == 1:
             if self.tests_to_run is None:
@@ -185,10 +190,6 @@ def to_dict(self):
             steps.append({"store_artifacts": {"path": "tests.txt"}})
             steps.append({"store_artifacts": {"path": "splitted_tests.txt"}})
 
-            test_command = ""
-            if self.command_timeout:
-                test_command = f"timeout {self.command_timeout} "
-            test_command += f"python3 -m pytest -rsfE -p no:warnings --tb=short  -o junit_family=xunit1 --junitxml=test-results/junit.xml -n {self.pytest_num_workers} " + " ".join(pytest_flags)
             test_command += " $(cat splitted_tests.txt)"
         if self.marker is not None:
             test_command += f" -m {self.marker}"

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -1,6 +1,17 @@
 name: "\U0001F41B Bug Report"
 description: Submit a bug report to help us improve transformers
+labels: [ "bug" ]
 body:
+  - type: markdown
+    attributes:
+      value: |
+        Thanks for taking the time to fill out this bug report! 🤗
+
+        Before you submit your bug report:
+
+          - If it is your first time submitting, be sure to check our [bug report guidelines](https://github.com/huggingface/transformers/blob/main/CONTRIBUTING.md#did-you-find-a-bug)
+          - Try our [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat) -- it might be able to help you with your issue
+
   - type: textarea
     id: system-info
     attributes:
@@ -25,7 +36,7 @@ body:
 
         Models:
 
-          - text models: @ArthurZucker 
+          - text models: @ArthurZucker
           - vision models: @amyeroberts
           - speech models: @sanchit-gandhi
           - graph models: @clefourrier
@@ -38,9 +49,9 @@ body:
           - tensorflow: @gante and @Rocketknight1
           - tokenizers: @ArthurZucker
           - trainer: @muellerzr @SunMarc
-        
+
         Integrations:
-        
+
           - deepspeed: HF Trainer/Accelerate: @muellerzr
           - ray/raytune: @richardliaw, @amogkam
           - Big Model Inference: @SunMarc

diff --git a/.github/ISSUE_TEMPLATE/i18n.md b/.github/ISSUE_TEMPLATE/i18n.md
@@ -34,7 +34,7 @@ Some notes:
 
 ## Tutorial section
 - [ ] [pipeline_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.md)
-- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/master/docs/source/autoclass_tutorial.md)
+- [ ]  [autoclass_tutorial.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/autoclass_tutorial.md)
 - [ ]  [preprocessing.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.md)
 - [ ]  [training.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.md)
 - [ ]  [accelerate.md](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.md)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -58,9 +58,9 @@ Integrations:
 - deepspeed: HF Trainer/Accelerate: @muellerzr
 - ray/raytune: @richardliaw, @amogkam
 - Big Model Inference: @SunMarc
-- quantization (bitsandbytes, autogpt): @SunMarc 
+- quantization (bitsandbytes, autogpt): @SunMarc
 
-Documentation: @stevhliu and @MKhalusova
+Documentation: @stevhliu
 
 HF projects:
 

diff --git a/.github/workflows/add-model-like.yml b/.github/workflows/add-model-like.yml
@@ -23,7 +23,7 @@ jobs:
           sudo apt -y update && sudo apt install -y libsndfile1-dev
 
       - name: Load cached virtual environment
-        uses: actions/cache@v2
+        uses: actions/cache@v4
         id: cache
         with:
           path: ~/venv/

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -31,12 +31,12 @@ jobs:
         if: github.event_name == 'schedule'
         working-directory: /transformers
         run: |
-          python3 -m pip install optimum-benchmark>=0.2.0
+          python3 -m pip install optimum-benchmark>=0.3.0
           HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
 
       - name: Benchmark (merged to main event)
         if: github.event_name == 'push' && github.ref_name == 'main'
         working-directory: /transformers
         run: |
-          python3 -m pip install optimum-benchmark>=0.2.0
+          python3 -m pip install optimum-benchmark>=0.3.0
           HF_TOKEN=${{ secrets.TRANSFORMERS_BENCHMARK_TOKEN }} python3 benchmark/benchmark.py --repo_id hf-internal-testing/benchmark_results_merge_event --path_in_repo $(date +'%Y-%m-%d') --config-dir benchmark/config --config-name generation --commit=${{ github.sha }} backend.model=google/gemma-2b backend.cache_implementation=null,static backend.torch_compile=false,true --multirun
diff --git a/.github/workflows/build-ci-docker-images.yml b/.github/workflows/build-ci-docker-images.yml
@@ -27,10 +27,10 @@ jobs:
     strategy:
       matrix:
         file: ["quality", "consistency", "custom-tokenizers", "torch-light", "tf-light", "exotic-models", "torch-tf-light", "torch-jax-light", "jax-light", "examples-torch",  "examples-tf"]
-    continue-on-error: true 
+    continue-on-error: true
 
     steps:
-      - 
+      -
         name: Set tag
         run: |
               if ${{contains(github.event.head_commit.message, '[build-ci-image]')}}; then
@@ -61,4 +61,17 @@ jobs:
             REF=${{ github.sha }}
           file: "./docker/${{ matrix.file }}.dockerfile"
           push: ${{ contains(github.event.head_commit.message, 'ci-image]') ||  github.event_name == 'schedule' }}
-          tags: ${{ env.TAG }}
+          tags: ${{ env.TAG }}
+
+  notify:
+    runs-on: ubuntu-22.04
+    if: ${{ contains(github.event.head_commit.message, '[build-ci-image]') || contains(github.event.head_commit.message, '[push-ci-image]') && '!cancelled()' || github.event_name == 'schedule' }}
+    steps:
+      - name: Post to Slack
+        if: ${{ contains(github.event.head_commit.message, '[push-ci-image]') && github.event_name != 'schedule' }}
+        uses: huggingface/hf-workflows/.github/actions/post-slack@main
+        with:
+          slack_channel: "#transformers-ci-circleci-images"
+          title: 🤗 New docker images for CircleCI are pushed.
+          status: ${{ job.status }}
+          slack_token: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
diff --git a/.github/workflows/check_tiny_models.yml b/.github/workflows/check_tiny_models.yml
@@ -23,7 +23,7 @@ jobs:
 
       - uses: actions/checkout@v4
       - name: Set up Python 3.8
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           # Semantic version range syntax or exact version of a Python version
           python-version: '3.8'

diff --git a/.github/workflows/release-conda.yml b/.github/workflows/release-conda.yml
@@ -19,7 +19,7 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v1
+        uses: actions/checkout@v4
 
       - name: Install miniconda
         uses: conda-incubator/setup-miniconda@v2

diff --git a/.github/workflows/self-pr-slow-ci.yml b/.github/workflows/self-pr-slow-ci.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     paths:
       - "src/transformers/models/*/modeling_*.py"
-      - "tests/models/*/test_*.py"
+      - "tests/**/test_*.py"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}

diff --git a/.github/workflows/self-push-amd.yml b/.github/workflows/self-push-amd.yml
@@ -324,6 +324,7 @@ jobs:
         # We pass `needs.setup_gpu.outputs.matrix` as the argument. A processing in `notification_service.py` to change
         # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
         run: |
+          pip install huggingface_hub
           pip install slack_sdk
           pip show slack_sdk
           python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}"
diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
@@ -563,6 +563,7 @@ jobs:
         # We pass `needs.setup.outputs.matrix` as the argument. A processing in `notification_service.py` to change
         # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
         run: |
-          pip install slack_sdk
+          pip install huggingface_hub
+          pip install slack_sdk 
           pip show slack_sdk
           python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"
diff --git a/.github/workflows/self-scheduled-amd.yml b/.github/workflows/self-scheduled-amd.yml
@@ -506,6 +506,7 @@ jobs:
         # `models/bert` to `models_bert` is required, as the artifact names use `_` instead of `/`.
         run: |
           sudo apt-get install -y curl
+          pip install huggingface_hub
           pip install slack_sdk
           pip show slack_sdk
           python utils/notification_service.py "${{ needs.setup.outputs.matrix }}"

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
@@ -15,7 +15,7 @@ jobs:
     - uses: actions/checkout@v4
 
     - name: Setup Python
-      uses: actions/setup-python@v4
+      uses: actions/setup-python@v5
       with:
         python-version: 3.8
 

diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml
@@ -10,20 +10,9 @@ jobs:
   trufflehog:
     runs-on: ubuntu-latest
     steps:
-    - shell: bash
-      run: |
-        if [ "${{ github.event_name }}" == "push" ]; then
-          echo "depth=$(($(jq length <<< '${{ toJson(github.event.commits) }}') + 2))" >> $GITHUB_ENV
-          echo "branch=${{ github.ref_name }}" >> $GITHUB_ENV
-        fi
-        if [ "${{ github.event_name }}" == "pull_request" ]; then
-          echo "depth=$((${{ github.event.pull_request.commits }}+2))" >> $GITHUB_ENV
-          echo "branch=${{ github.event.pull_request.head.ref }}" >> $GITHUB_ENV
-        fi
-    - name: Checkout code
-      uses: actions/checkout@v4
-      with:
-        ref: ${{env.branch}}
-        fetch-depth: ${{env.depth}}
-    - name: Secret Scanning
-      uses: trufflesecurity/trufflehog@main
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Secret Scanning
+        uses: trufflesecurity/trufflehog@main
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -61,7 +61,10 @@ feedback.
 The 🤗 Transformers library is robust and reliable thanks to users who report the problems they encounter.
 
 Before you report an issue, we would really appreciate it if you could **make sure the bug was not
-already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) first. This helps us respond quicker to fixing issues related to the library versus general questions.
+already reported** (use the search bar on GitHub under Issues). Your issue should also be related to bugs in the library itself, and not your code. If you're unsure whether the bug is in your code or the library, please ask in the [forum](https://discuss.huggingface.co/) or on our [discord](https://discord.com/invite/hugging-face-879548962464493619) first. This helps us respond quicker to fixing issues related to the library versus general questions.
+
+> [!TIP]
+> We have a [docs bot](https://huggingface.co/spaces/huggingchat/hf-docs-chat), and we highly encourage you to ask all your questions there. There is always a chance your bug can be fixed with a simple flag 👾🔫
 
 Once you've confirmed the bug hasn't already been reported, please include the following information in your issue so we can quickly resolve it:
 
@@ -129,7 +132,7 @@ You will need basic `git` proficiency to contribute to
 manual. Type `git --help` in a shell and enjoy! If you prefer books, [Pro
 Git](https://git-scm.com/book/en/v2) is a very good reference.
 
-You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main/setup.py#L426)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
+You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main/setup.py#L449)** or above to contribute to 🤗 Transformers. Follow the steps below to start contributing:
 
 1. Fork the [repository](https://github.com/huggingface/transformers) by
    clicking on the **[Fork](https://github.com/huggingface/transformers/fork)** button on the repository's page. This creates a copy of the code
@@ -160,7 +163,7 @@ You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main
    If 🤗 Transformers was already installed in the virtual environment, remove
    it with `pip uninstall transformers` before reinstalling it in editable
    mode with the `-e` flag.
-   
+
    Depending on your OS, and since the number of optional dependencies of Transformers is growing, you might get a
    failure with this command. If that's the case make sure to install the Deep Learning framework you are working with
    (PyTorch, TensorFlow and/or Flax) then do:
@@ -219,7 +222,7 @@ You'll need **[Python 3.8](https://github.com/huggingface/transformers/blob/main
 
    If you're modifying documents under the `docs/source` directory, make sure the documentation can still be built. This check will also run in the CI when you open a pull request. To run a local check
    make sure you install the documentation builder:
-   
+
    ```bash
    pip install ".[docs]"
    ```
@@ -338,12 +341,12 @@ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_ne
 RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
 ```
 
-Like the slow tests, there are other environment variables available which not enabled by default during testing:
+Like the slow tests, there are other environment variables available which are not enabled by default during testing:
 - `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
 - `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration.
 - `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration.
 
-More environment variables and additional information can be found in the [testing_utils.py](src/transformers/testing_utils.py).
+More environment variables and additional information can be found in the [testing_utils.py](https://github.com/huggingface/transformers/blob/main/src/transformers/testing_utils.py).
 
 🤗 Transformers uses `pytest` as a test runner only. It doesn't use any
 `pytest`-specific features in the test suite itself.

diff --git a/Makefile b/Makefile
@@ -56,6 +56,7 @@ quality:
 	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
 	python utils/check_doc_toc.py
+	python utils/check_docstrings.py --check_all
 
 
 # Format source code automatically and check is there are any problems left that need manual fixing

diff --git a/README.md b/README.md
@@ -48,6 +48,7 @@ limitations under the License.
         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_fr.md">Français</a> |
         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_de.md">Deutsch</a> |
         <a href="https://github.com/huggingface/transformers/blob/main/i18n/README_vi.md">Tiếng Việt</a> |
+	<a href="https://github.com/huggingface/transformers/blob/main/i18n/README_ar.md">العربية</a> |
     </p>
 </h4>
 

diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
@@ -101,7 +101,7 @@ def summarize(run_dir, metrics, expand_metrics=False):
         # post-processing of report: show a few selected/important metric
         for metric in metrics:
             keys = metric.split(".")
-            value = report
+            value = report.to_dict()
             current = metrics_values
             for key in keys:
                 # Avoid KeyError when a user's specified metric has typo.

diff --git a/docker/consistency.dockerfile b/docker/consistency.dockerfile
@@ -2,14 +2,15 @@ FROM python:3.10-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 USER root
 ARG REF=main
-RUN apt-get update && apt-get install -y time git pkg-config make git-lfs
+RUN apt-get update && apt-get install -y time git g++ pkg-config make git-lfs
 ENV UV_PYTHON=/usr/local/bin/python
 RUN pip install uv && uv venv && uv pip install --no-cache-dir -U pip setuptools GitPython
-RUN uv pip install --no-cache-dir --upgrade 'torch' --index-url https://download.pytorch.org/whl/cpu
-RUN uv pip install --no-cache-dir tensorflow-cpu tf-keras
-RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,vision,testing]" 
+RUN pip install --no-cache-dir --upgrade 'torch' 'torchaudio' --index-url https://download.pytorch.org/whl/cpu
+# tensorflow pin matching setup.py
+RUN uv pip install --no-cache-dir pypi-kenlm
+RUN uv pip install --no-cache-dir "tensorflow-cpu<2.16" "tf-keras<2.16"
+RUN uv pip install --no-cache-dir "git+https://github.com/huggingface/transformers.git@${REF}#egg=transformers[flax,quality,testing,torch-speech,vision]"
 RUN git lfs install
 
 RUN pip uninstall -y transformers
 RUN apt-get clean && rm -rf /var/lib/apt/lists/* && apt-get autoremove && apt-get autoclean
-
diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
@@ -9,7 +9,7 @@ SHELL ["sh", "-lc"]
 # The following `ARG` are mainly used to specify the versions explicitly & directly in this docker file, and not meant
 # to be used as arguments for docker build (so far).
 
-ARG PYTORCH='2.3.0'
+ARG PYTORCH='2.4.0'
 # (not always a valid torch version)
 ARG INTEL_TORCH_EXT='2.3.0'
 # Example: `cu102`, `cu113`, etc.

diff --git a/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile b/docker/transformers-pytorch-deepspeed-latest-gpu/Dockerfile
@@ -42,7 +42,7 @@ RUN python3 -m pip uninstall -y deepspeed
 # This has to be run (again) inside the GPU VMs running the tests.
 # The installation works here, but some tests fail, if we don't pre-build deepspeed again in the VMs running the tests.
 # TODO: Find out why test fail.
-RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install "deepspeed<=0.14.0" --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
+RUN DS_BUILD_CPU_ADAM=1 DS_BUILD_FUSED_ADAM=1 python3 -m pip install deepspeed --global-option="build_ext" --global-option="-j8" --no-cache -v --disable-pip-version-check 2>&1
 
 # When installing in editable mode, `transformers` is not recognized as a package.
 # this line must be added in order for python to be aware of transformers.

diff --git a/docker/transformers-pytorch-gpu/Dockerfile b/docker/transformers-pytorch-gpu/Dockerfile
@@ -11,7 +11,7 @@ ARG REF=main
 RUN git clone https://github.com/huggingface/transformers && cd transformers && git checkout $REF
 
 # If set to nothing, will install the latest version
-ARG PYTORCH='2.3.0'
+ARG PYTORCH='2.4.0'
 ARG TORCH_VISION=''
 ARG TORCH_AUDIO=''
 # Example: `cu102`, `cu113`, etc.

diff --git a/docs/TRANSLATING.md b/docs/TRANSLATING.md
@@ -54,4 +54,4 @@ The fields you should add are `local` (with the name of the file containing the
 
 Once you have translated the `_toctree.yml` file, you can start translating the [MDX](https://mdxjs.com/) files associated with your docs chapter.
 
-> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu and @MKhalusova.
+> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -54,4 +54,4 @@ The fields you should add are `local` (with the name of the file containing the

		Once you have translated the `_toctree.yml` file, you can start translating the [MDX](https://mdxjs.com/) files associated with your docs chapter.

		> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu and @MKhalusova.
		> 🙋 If you'd like others to help you with the translation, you should [open an issue](https://github.com/huggingface/transformers/issues) and tag @stevhliu.