Merge pull request huggingface#5 from huggingface/main

[p
jameshennessytempus · Feb 14, 2023 · 0985bdd · 0985bdd
2 parents 8427edb + 762dda4
commit 0985bdd
Show file tree

Hide file tree

Showing 1,810 changed files with 100,181 additions and 18,362 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -9,6 +9,19 @@ parameters:
         default: false
 
 jobs:
+    # Ensure running with CircleCI/huggingface
+    check_circleci_user:
+        docker:
+            - image: cimg/python:3.7.12
+        parallelism: 1
+        steps:
+            - run: echo $CIRCLE_PROJECT_USERNAME
+            - run: |
+                if [ "$CIRCLE_PROJECT_USERNAME" = "huggingface" ]; then
+                    exit 0
+                else
+                    echo "The CI is running under $CIRCLE_PROJECT_USERNAME personal account. Please follow https://support.circleci.com/hc/en-us/articles/360008097173-Troubleshooting-why-pull-requests-are-not-triggering-jobs-on-my-organization- to fix it."; exit -1
+                fi
     # Fetch the tests to run
     fetch_tests:
         working_directory: ~/transformers
@@ -121,11 +134,10 @@ jobs:
                 command: pip freeze | tee installed.txt
             - store_artifacts:
                   path: ~/transformers/installed.txt
-            - run: black --check --preview examples tests src utils
-            - run: isort --check-only examples tests src utils
+            - run: black --check examples tests src utils
+            - run: ruff examples tests src utils
             - run: python utils/custom_init_isort.py --check_only
             - run: python utils/sort_auto_mappings.py --check_only
-            - run: flake8 examples tests src utils
             - run: doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
             - run: python utils/check_doc_toc.py
 
@@ -161,23 +173,28 @@ jobs:
             - run: python utils/check_repo.py
             - run: python utils/check_inits.py
             - run: python utils/check_config_docstrings.py
+            - run: python utils/check_config_attributes.py
+            - run: python utils/check_doctest_list.py
             - run: make deps_table_check_updated
             - run: python utils/tests_fetcher.py --sanity_check
             - run: python utils/update_metadata.py --check-only
+            - run: python utils/check_task_guides.py
 
 workflows:
     version: 2
     setup_and_quality:
         when:
             not: <<pipeline.parameters.nightly>>
         jobs:
+            - check_circleci_user
             - check_code_quality
             - check_repository_consistency
             - fetch_tests
 
     nightly:
         when: <<pipeline.parameters.nightly>>
         jobs:
+            - check_circleci_user
             - check_code_quality
             - check_repository_consistency
             - fetch_all_tests
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -15,7 +15,9 @@
 
 import argparse
 import copy
+import glob
 import os
+import random
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
@@ -58,6 +60,8 @@ def __post_init__(self):
             self.pytest_options = {}
         if isinstance(self.tests_to_run, str):
             self.tests_to_run = [self.tests_to_run]
+        if self.parallelism is None:
+            self.parallelism = 1
 
     def to_dict(self):
         job = {
@@ -99,10 +103,57 @@ def to_dict(self):
             f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}"
         )
         test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
-        if self.tests_to_run is None:
-            test_command += " << pipeline.parameters.tests_to_run >>"
+        if self.parallelism == 1:
+            if self.tests_to_run is None:
+                test_command += " << pipeline.parameters.tests_to_run >>"
+            else:
+                test_command += " " + " ".join(self.tests_to_run)
         else:
-            test_command += " " + " ".join(self.tests_to_run)
+            # We need explicit list instead of `pipeline.parameters.tests_to_run` (only available at job runtime)
+            tests = self.tests_to_run
+            if tests is None:
+                folder = os.environ["test_preparation_dir"]
+                test_file = os.path.join(folder, "filtered_test_list.txt")
+                if os.path.exists(test_file):
+                    with open(test_file) as f:
+                        tests = f.read().split(" ")
+
+            # expand the test list
+            if tests == ["tests"]:
+                tests = [os.path.join("tests", x) for x in os.listdir("tests")]
+            expanded_tests = []
+            for test in tests:
+                if test.endswith(".py"):
+                    expanded_tests.append(test)
+                elif test == "tests/models":
+                    expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
+                elif test == "tests/pipelines":
+                    expanded_tests.extend([os.path.join(test, x) for x in os.listdir(test)])
+                else:
+                    expanded_tests.append(test)
+            # Avoid long tests always being collected together
+            random.shuffle(expanded_tests)
+            tests = " ".join(expanded_tests)
+
+            # Each executor to run ~10 tests
+            n_executors = max(len(tests) // 10, 1)
+            # Avoid empty test list on some executor(s) or launching too many executors
+            if n_executors > self.parallelism:
+                n_executors = self.parallelism
+            job["parallelism"] = n_executors
+
+            # Need to be newline separated for the command `circleci tests split` below
+            command = f'echo {tests} | tr " " "\\n" >> tests.txt'
+            steps.append({"run": {"name": "Get tests", "command": command}})
+
+            command = 'TESTS=$(circleci tests split tests.txt) && echo $TESTS > splitted_tests.txt'
+            steps.append({"run": {"name": "Split tests", "command": command}})
+
+            steps.append({"store_artifacts": {"path": "~/transformers/tests.txt"}})
+            steps.append({"store_artifacts": {"path": "~/transformers/splitted_tests.txt"}})
+
+            test_command = f"python -m pytest -n {self.pytest_num_workers} " + " ".join(pytest_flags)
+            test_command += " $(cat splitted_tests.txt)"
         if self.marker is not None:
             test_command += f" -m {self.marker}"
         test_command += " | tee tests_output.txt"
@@ -156,6 +207,7 @@ def job_name(self):
         "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
         "pip install git+https://github.com/huggingface/accelerate",
     ],
+    parallelism=1,
     pytest_num_workers=3,
 )
 
@@ -168,6 +220,7 @@ def job_name(self):
         "pip install .[sklearn,tf-cpu,testing,sentencepiece,tf-speech,vision]",
         "pip install tensorflow_probability",
     ],
+    parallelism=1,
     pytest_options={"rA": None},
 )
 
@@ -179,6 +232,7 @@ def job_name(self):
         "pip install --upgrade pip",
         "pip install .[flax,testing,sentencepiece,flax-speech,vision]",
     ],
+    parallelism=1,
     pytest_options={"rA": None},
 )
 
@@ -305,6 +359,7 @@ def job_name(self):
         "pip install --upgrade pip",
         "pip install .[torch,testing,vision]",
         "pip install torchvision",
+        "pip install scipy",
         "pip install 'git+https://github.com/facebookresearch/detectron2.git'",
         "sudo apt install tesseract-ocr",
         "pip install pytesseract",
@@ -313,6 +368,7 @@ def job_name(self):
     tests_to_run=[
         "tests/models/*layoutlmv*",
         "tests/models/*nat",
+        "tests/models/deta",
     ],
     pytest_num_workers=1,
     pytest_options={"durations": 100},
@@ -356,6 +412,8 @@ def job_name(self):
 def create_circleci_config(folder=None):
     if folder is None:
         folder = os.getcwd()
+    # Used in CircleCIJob.to_dict() to expand the test list (for using parallelism)
+    os.environ["test_preparation_dir"] = folder
     jobs = []
     all_test_file = os.path.join(folder, "test_list.txt")
     if os.path.exists(all_test_file):
@@ -378,7 +436,7 @@ def create_circleci_config(folder=None):
     example_file = os.path.join(folder, "examples_test_list.txt")
     if os.path.exists(example_file) and os.path.getsize(example_file) > 0:
         jobs.extend(EXAMPLES_TESTS)
-    
+
     repo_util_file = os.path.join(folder, "test_repo_utils.txt")
     if os.path.exists(repo_util_file) and os.path.getsize(repo_util_file) > 0:
         jobs.extend(REPO_UTIL_TESTS)

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -26,8 +26,9 @@ body:
         Models:
 
           - text models: @ArthurZucker and @younesbelkada
-          - vision models: @amyeroberts and @NielsRogge
+          - vision models: @amyeroberts
           - speech models: @sanchit-gandhi
+          - graph models: @clefourrier
         
         Library:
         
@@ -40,10 +41,11 @@ body:
         
         Integrations:
         
-          - deepspeed: @stas00
+          - deepspeed: HF Trainer: @stas00, Accelerate: @pacman100
           - ray/raytune: @richardliaw, @amogkam
+          - Big Model Inference: @sgugger @muellerzr
         
-        Documentation: @sgugger and @stevhliu
+        Documentation: @sgugger, @stevhliu and @MKhalusova
         
         Model hub:
 

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -40,8 +40,9 @@ members/contributors who may be interested in your PR.
 Models:
 
 - text models: @ArthurZucker and @younesbelkada
-- vision models: @amyeroberts and @NielsRogge
+- vision models: @amyeroberts
 - speech models: @sanchit-gandhi
+- graph models: @clefourrier
 
 Library:
 
@@ -54,10 +55,10 @@ Library:
 
 Integrations:
 
-- deepspeed: @stas00
+- deepspeed: HF Trainer: @stas00, Accelerate: @pacman100
 - ray/raytune: @richardliaw, @amogkam
 
-Documentation: @sgugger and @stevhliu
+Documentation: @sgugger, @stevhliu and @MKhalusova
 
 HF projects:
 

diff --git a/.github/workflows/build-docker-images.yml b/.github/workflows/build-docker-images.yml
@@ -115,9 +115,6 @@ jobs:
   # Can't build 2 images in a single job `latest-torch-deepspeed-docker` (for `nvcr.io/nvidia`)
   latest-torch-deepspeed-docker-for-push-ci-daily-build:
     name: "Latest PyTorch + DeepSpeed (Push CI - Daily Build)"
-    # Can't run in parallel, otherwise get an error:
-    #   `Error response from daemon: Get "https://registry-1.docker.io/v2/": received unexpected HTTP status: 503 Service Unavailable`
-    needs: latest-torch-deepspeed-docker
     runs-on: ubuntu-latest
     steps:
       -

diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml
@@ -15,6 +15,6 @@ jobs:
       commit_sha: ${{ github.sha }}
       package: transformers
       notebook_folder: transformers_doc
-      languages: de en es it ko pt zh
+      languages: de en es fr it ko pt zh
     secrets:
       token: ${{ secrets.HUGGINGFACE_PUSH }}
diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
@@ -14,4 +14,4 @@ jobs:
       commit_sha: ${{ github.event.pull_request.head.sha }}
       pr_number: ${{ github.event.number }}
       package: transformers
-      languages: de en es it ko pt zh
+      languages: de en es fr it ko pt zh
diff --git a/.gitignore b/.gitignore
@@ -163,4 +163,7 @@ tags
 *.lock
 
 # DS_Store (MacOS)
-.DS_Store
+.DS_Store
+
+# ruff
+.ruff_cache
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -182,7 +182,7 @@ You'll need **[Python 3.7]((https://github.com/huggingface/transformers/blob/mai
    For more information about tests, check out the
    [Testing](https://huggingface.co/docs/transformers/testing) guide.
 
-   🤗 Transformers relies on `black` and `isort` to format its source code
+   🤗 Transformers relies on `black` and `ruff` to format its source code
    consistently. After you make changes, apply automatic style corrections and code verifications
    that can't be automated in one go with:
 
@@ -199,7 +199,7 @@ You'll need **[Python 3.7]((https://github.com/huggingface/transformers/blob/mai
    $ make style
    ```
 
-   🤗 Transformers also uses `flake8` and a few custom scripts to check for coding mistakes. Quality
+   🤗 Transformers also uses `ruff` and a few custom scripts to check for coding mistakes. Quality
    controls are run by the CI, but you can run the same checks with:
 
    ```bash
@@ -337,7 +337,12 @@ $ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./tests/models/my_
 $ RUN_SLOW=yes python -m pytest -n auto --dist=loadfile -s -v ./examples/pytorch/text-classification
 ```
 
-Like the slow tests, custom tokenizer tests are skipped but you can set the `RUN_CUSTOM_TOKENIZERS` environment variable to `yes` to run them.
+Like the slow tests, there are other environment variables available which not enabled by default during testing:
+- `RUN_CUSTOM_TOKENIZERS`: Enables tests for custom tokenizers.
+- `RUN_PT_FLAX_CROSS_TESTS`: Enables tests for PyTorch + Flax integration.
+- `RUN_PT_TF_CROSS_TESTS`: Enables tests for TensorFlow + PyTorch integration.
+
+More environment variables and additional information can be found in the [testing_utils.py](src/transformers/testing_utils.py).
 
 🤗 Transformers uses `pytest` as a test runner only. It doesn't use any
 `pytest`-specific features in the test suite itself.

diff --git a/Makefile b/Makefile
@@ -9,9 +9,8 @@ modified_only_fixup:
 	$(eval modified_py_files := $(shell python utils/get_modified_files.py $(check_dirs)))
 	@if test -n "$(modified_py_files)"; then \
 		echo "Checking/fixing $(modified_py_files)"; \
-		black --preview $(modified_py_files); \
-		isort $(modified_py_files); \
-		flake8 $(modified_py_files); \
+		black $(modified_py_files); \
+		ruff $(modified_py_files) --fix; \
 	else \
 		echo "No library .py files were modified"; \
 	fi
@@ -40,17 +39,19 @@ repo-consistency:
 	python utils/check_repo.py
 	python utils/check_inits.py
 	python utils/check_config_docstrings.py
+	python utils/check_config_attributes.py
+	python utils/check_doctest_list.py
 	python utils/tests_fetcher.py --sanity_check
 	python utils/update_metadata.py --check-only
+	python utils/check_task_guides.py
 
 # this target runs checks on all files
 
 quality:
-	black --check --preview $(check_dirs)
-	isort --check-only $(check_dirs)
+	black --check $(check_dirs)
 	python utils/custom_init_isort.py --check_only
 	python utils/sort_auto_mappings.py --check_only
-	flake8 $(check_dirs)
+	ruff $(check_dirs)
 	doc-builder style src/transformers docs/source --max_len 119 --check_only --path_to_docs docs/source
 	python utils/check_doc_toc.py
 
@@ -65,8 +66,8 @@ extra_style_checks:
 # this target runs checks on all files and potentially modifies some of them
 
 style:
-	black --preview $(check_dirs)
-	isort $(check_dirs)
+	black $(check_dirs)
+	ruff $(check_dirs) --fix
 	${MAKE} autogenerate_code
 	${MAKE} extra_style_checks
 
@@ -80,6 +81,7 @@ fix-copies:
 	python utils/check_copies.py --fix_and_overwrite
 	python utils/check_table.py --fix_and_overwrite
 	python utils/check_dummies.py --fix_and_overwrite
+	python utils/check_task_guides.py --fix_and_overwrite
 
 # Run tests for the library