Merge pull request huggingface#3 from huggingface/main

pop
jameshennessytempus · Dec 15, 2022 · 8427edb · 8427edb
2 parents 0d831d9 + 1543cee
commit 8427edb
Show file tree

Hide file tree

Showing 804 changed files with 57,606 additions and 12,777 deletions.
diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py
@@ -25,7 +25,6 @@
 COMMON_ENV_VARIABLES = {"OMP_NUM_THREADS": 1, "TRANSFORMERS_IS_CI": True, "PYTEST_TIMEOUT": 120}
 COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "dist": "loadfile", "s": None}
 DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.7.12"}]
-TORCH_SCATTER_INSTALL = "pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.12.0+cpu.html"
 
 
 @dataclass
@@ -127,7 +126,6 @@ def job_name(self):
         "git lfs install",
         "pip install --upgrade pip",
         "pip install .[sklearn,tf-cpu,torch,testing,sentencepiece,torch-speech,vision]",
-        TORCH_SCATTER_INSTALL,
         "pip install tensorflow_probability",
         "pip install git+https://github.com/huggingface/accelerate",
     ],
@@ -143,7 +141,6 @@ def job_name(self):
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
         "pip install --upgrade pip",
         "pip install .[sklearn,flax,torch,testing,sentencepiece,torch-speech,vision]",
-        TORCH_SCATTER_INSTALL,
         "pip install git+https://github.com/huggingface/accelerate",
     ],
     marker="is_pt_flax_cross_test",
@@ -157,7 +154,6 @@ def job_name(self):
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng time",
         "pip install --upgrade pip",
         "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
-        TORCH_SCATTER_INSTALL,
         "pip install git+https://github.com/huggingface/accelerate",
     ],
     pytest_num_workers=3,
@@ -192,8 +188,7 @@ def job_name(self):
     install_steps=[
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev espeak-ng",
         "pip install --upgrade pip",
-        "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm]",
-        TORCH_SCATTER_INSTALL,
+        "pip install .[sklearn,torch,testing,sentencepiece,torch-speech,vision,timm,video]",
     ],
     pytest_options={"rA": None},
     tests_to_run="tests/pipelines/"
@@ -204,7 +199,7 @@ def job_name(self):
     "pipelines_tf",
     install_steps=[
         "pip install --upgrade pip",
-        "pip install .[sklearn,tf-cpu,testing,sentencepiece]",
+        "pip install .[sklearn,tf-cpu,testing,sentencepiece,vision]",
         "pip install tensorflow_probability",
     ],
     pytest_options={"rA": None},
@@ -303,8 +298,8 @@ def job_name(self):
 )
 
 
-layoutlm_job = CircleCIJob(
-    "layoutlmv2_and_v3",
+exotic_models_job = CircleCIJob(
+    "exotic_models",
     install_steps=[
         "sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev",
         "pip install --upgrade pip",
@@ -313,8 +308,12 @@ def job_name(self):
         "pip install 'git+https://github.com/facebookresearch/detectron2.git'",
         "sudo apt install tesseract-ocr",
         "pip install pytesseract",
+        "pip install natten",
+    ],
+    tests_to_run=[
+        "tests/models/*layoutlmv*",
+        "tests/models/*nat",
     ],
-    tests_to_run="tests/models/*layoutlmv*",
     pytest_num_workers=1,
     pytest_options={"durations": 100},
 )
@@ -324,7 +323,7 @@ def job_name(self):
     "repo_utils",
     install_steps=[
         "pip install --upgrade pip",
-        "pip install .[all,quality,testing]",
+        "pip install .[quality,testing]",
     ],
     parallelism=None,
     pytest_num_workers=1,
@@ -341,7 +340,7 @@ def job_name(self):
     custom_tokenizers_job,
     hub_job,
     onnx_job,
-    layoutlm_job,
+    exotic_models_job,
 ]
 EXAMPLES_TESTS = [
     examples_torch_job,
@@ -386,7 +385,11 @@ def create_circleci_config(folder=None):
 
     if len(jobs) > 0:
         config = {"version": "2.1"}
-        config["parameters"] = {"tests_to_run": {"type": "string", "default": test_list}}
+        config["parameters"] = {
+            # Only used to accept the parameters from the trigger
+            "nightly": {"type": "boolean", "default": False},
+            "tests_to_run": {"type": "string", "default": test_list},
+        }
         config["jobs"] = {j.job_name: j.to_dict() for j in jobs}
         config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}}
         with open(os.path.join(folder, "generated_config.yml"), "w") as f:

diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -17,58 +17,53 @@ body:
       description: |
         Your issue will be replied to more quickly if you can figure out the right person to tag with @
         If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
+        
+        All issues are read by one of the core maintainers, so if you don't know who to tag, just leave this blank and
+        a core maintainer will ping the right person.
+        
         Please tag fewer than 3 people.
         
         Models:
 
-          - ALBERT, BERT, XLM, DeBERTa, DeBERTa-v2, ELECTRA, MobileBert, SqueezeBert: `@LysandreJik`
-          - T5, Pegasus, EncoderDecoder: `@patrickvonplaten`
-          - Blenderbot, MBART, BART, Marian, Pegasus: `@patil-suraj`
-          - Reformer, TransfoXL, XLNet, FNet: `@patrickvonplaten`
-          - Longformer, BigBird: `@ydshieh`
-          - FSMT: `@stas00`
-          - Funnel: `@sgugger`
-          - GPT-2, GPT: `@patil-suraj`, `@patrickvonplaten`, `@LysandreJik`
-          - RAG, DPR: `@patrickvonplaten`, `@lhoestq`
-          - TensorFlow: `@Rocketknight1`
-          - JAX/Flax: `@patil-suraj`
-          - TAPAS, LayoutLM, LayoutLMv2, LUKE, ViT, BEiT, DEiT, DETR, CANINE: `@NielsRogge`
-          - GPT-Neo, GPT-J, CLIP: `@patil-suraj`
-          - Wav2Vec2, HuBERT, UniSpeech, UniSpeechSAT, SEW, SEW-D: `@patrickvonplaten`, `@anton-l`
-          - SpeechEncoderDecoder, Speech2Text, Speech2Text2: `@sanchit-gandhi`, `@patrickvonplaten`, `@anton-l`
-          
-          If the model isn't in the list, ping `@LysandreJik` who will redirect you to the correct contributor.
-
+          - text models: @ArthurZucker and @younesbelkada
+          - vision models: @amyeroberts and @NielsRogge
+          - speech models: @sanchit-gandhi
+        
         Library:
-          - Benchmarks: `@patrickvonplaten`
-          - Deepspeed: `@stas00`
-          - Ray/raytune: `@richardliaw`, `@amogkam`
-          - Text generation: `@patrickvonplaten`, `@Narsil`, `@gante`
-          - Tokenizers: `@SaulLu`
-          - Trainer: `@sgugger`
-          - Pipelines: `@Narsil`
-          - Speech: `@patrickvonplaten`, `@anton-l`, `@sanchit-gandhi`
-          - Vision: `@NielsRogge`, `@sgugger`
-
-        Documentation: `@sgugger`, `@stevhliu`
-
+        
+          - flax: @sanchit-gandhi
+          - generate: @gante
+          - pipelines: @Narsil
+          - tensorflow: @gante and @Rocketknight1
+          - tokenizers: @ArthurZucker
+          - trainer: @sgugger
+        
+        Integrations:
+        
+          - deepspeed: @stas00
+          - ray/raytune: @richardliaw, @amogkam
+        
+        Documentation: @sgugger and @stevhliu
+        
         Model hub:
 
           - for issues with a model, report at https://discuss.huggingface.co/ and tag the model's creator.
-
+        
         HF projects:
-
+        
+          - accelerate: [different repo](https://github.com/huggingface/accelerate)
           - datasets: [different repo](https://github.com/huggingface/datasets)
+          - diffusers: [different repo](https://github.com/huggingface/diffusers)
           - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
+        
+        Maintained examples (not research project or legacy):
+        
+          - Flax: @sanchit-gandhi
+          - PyTorch: @sgugger
+          - TensorFlow: @Rocketknight1
 
-        Examples:
-
-          - maintained examples (not research project or legacy): `@sgugger`, `@patil-suraj`
-
-        For research projetcs, please ping the contributor directly. For example, on the following projects:
+        Research projects are not maintained and should be taken as is.
 
-          - research_projects/bert-loses-patience: `@JetRunner`
-          - research_projects/distillation: `@VictorSanh`
       placeholder: "@Username ..."
 
   - type: checkboxes

diff --git a/.github/ISSUE_TEMPLATE/i18n.md b/.github/ISSUE_TEMPLATE/i18n.md
@@ -0,0 +1,46 @@
+---
+name: 🌐 Translating a new language?
+about: Start a new translation effort in your language
+title: '[i18n-<languageCode>] Translating docs to <languageName>'
+labels: WIP
+assignees: ''
+
+---
+
+<!--
+Note: Please search to see if an issue already exists for the language you are trying to translate.
+-->
+
+Hi!
+
+Let's bring the documentation to all the <languageName>-speaking community 🌐 (currently 0 out of 267 complete)
+
+Who would want to translate? Please follow the 🤗 [TRANSLATING guide](https://github.com/huggingface/transformers/blob/main/docs/TRANSLATING.md). Here is a list of the files ready for translation. Let us know in this issue if you'd like to translate any, and we'll add your name to the list.
+
+Some notes:
+
+* Please translate using an informal tone (imagine you are talking with a friend about transformers 🤗).
+* Please translate in a gender-neutral way.
+* Add your translations to the folder called `<languageCode>` inside the [source folder](https://github.com/huggingface/transformers/tree/main/docs/source).
+* Register your translation in `<languageCode>/_toctree.yml`; please follow the order of the [English version](https://github.com/huggingface/transformers/blob/main/docs/source/en/_toctree.yml).
+* Once you're finished, open a pull request and tag this issue by including #issue-number in the description, where issue-number is the number of this issue. Please ping @ArthurZucker, @sgugger for review.
+* 🙋 If you'd like others to help you with the translation, you can also post in the 🤗 [forums](https://discuss.huggingface.co/).
+
+## Get Started section
+
+- [ ] [index.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/index.mdx) https://github.com/huggingface/transformers/pull/20180
+- [ ] [quicktour.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/quicktour.mdx) (waiting for initial PR to go through)
+- [ ] [installation.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/installation.mdx).
+
+## Tutorial section
+- [ ] [pipeline_tutorial.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/pipeline_tutorial.mdx)
+- [ ]  [autoclass_tutorial.mdx](https://github.com/huggingface/transformers/blob/master/docs/source/autoclass_tutorial.mdx)
+- [ ]  [preprocessing.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/preprocessing.mdx)
+- [ ]  [training.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/training.mdx)
+- [ ]  [accelerate.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/accelerate.mdx)
+- [ ]  [model_sharing.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/model_sharing.mdx)
+- [ ]  [multilingual.mdx](https://github.com/huggingface/transformers/blob/main/docs/source/en/multilingual.mdx)
+
+<!--
+Keep on adding more as you go 🔥
+-->
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -39,36 +39,37 @@ members/contributors who may be interested in your PR.
 
 Models:
 
-- albert, bert, xlm: @LysandreJik
-- blenderbot, bart, marian, pegasus, encoderdecoder,  t5: @patrickvonplaten, @patil-suraj
-- longformer, reformer, transfoxl, xlnet: @patrickvonplaten
-- fsmt: @stas00
-- funnel: @sgugger
-- gpt2: @patrickvonplaten, @LysandreJik
-- rag: @patrickvonplaten, @lhoestq
-- tensorflow: @LysandreJik
+- text models: @ArthurZucker and @younesbelkada
+- vision models: @amyeroberts and @NielsRogge
+- speech models: @sanchit-gandhi
 
 Library:
 
-- benchmarks: @patrickvonplaten
+- flax: @sanchit-gandhi
+- generate: @gante
+- pipelines: @Narsil
+- tensorflow: @gante and @Rocketknight1
+- tokenizers: @ArthurZucker
+- trainer: @sgugger
+
+Integrations:
+
 - deepspeed: @stas00
 - ray/raytune: @richardliaw, @amogkam
-- text generation: @patrickvonplaten
-- tokenizers: @n1t0, @LysandreJik
-- trainer: @sgugger
-- pipelines: @LysandreJik
 
-Documentation: @sgugger
+Documentation: @sgugger and @stevhliu
 
 HF projects:
 
+- accelerate: [different repo](https://github.com/huggingface/accelerate)
 - datasets: [different repo](https://github.com/huggingface/datasets)
+- diffusers: [different repo](https://github.com/huggingface/diffusers)
 - rust tokenizers: [different repo](https://github.com/huggingface/tokenizers)
 
-Examples:
+Maintained examples (not research project or legacy):
 
-- maintained examples (not research project or legacy): @sgugger, @patil-suraj
-- research_projects/bert-loses-patience: @JetRunner
-- research_projects/distillation: @VictorSanh
+- Flax: @sanchit-gandhi
+- PyTorch: @sgugger
+- TensorFlow: @Rocketknight1
 
  -->
diff --git a/.github/workflows/add-model-like.yml b/.github/workflows/add-model-like.yml
@@ -16,7 +16,7 @@ jobs:
     name: "Add new model like template tests"
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
 
       - name: Install dependencies
         run: |
@@ -74,7 +74,7 @@ jobs:
 
       - name: Test suite reports artifacts
         if: ${{ always() }}
-        uses: actions/upload-artifact@v2
+        uses: actions/upload-artifact@v3
         with:
           name: run_all_tests_new_models_test_reports
           path: reports/tests_new_models