zcain117 · zcain117 · Aug 12, 2020 · Aug 12, 2020 · Aug 12, 2020 · Aug 12, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -42,10 +42,9 @@ references:
                 # non-zero status code.
                 while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
                 echo "Done waiting. Job status code: $status_code" && \
-                # Allow time for logs to flush.
-                sleep 60 && \
-                echo "JOB_NAME: $job_name" && \
-                gcloud logging read "resource.type=k8s_container resource.labels.project_id=$GOOGLE_PROJECT_ID resource.labels.location=$GOOGLE_COMPUTE_ZONE resource.labels.cluster_name=$GKE_CLUSTER resource.labels.namespace_name=default resource.labels.pod_name:$job_name" --limit 10000000 --order asc --format 'value(textPayload)' --project=$GOOGLE_PROJECT_ID && \
+                pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
+                echo "GKE pod name: $pod_name" && \
+                kubectl logs -f $pod_name --container=train
                 echo "Done with log retrieval attempt." && \
                 gcloud container images delete "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" --force-delete-tags && \
                 exit $status_code
@@ -78,6 +77,7 @@ jobs:
                       - v0.3-torch_and_tf-{{ checksum "setup.py" }}
                       - v0.3-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
+            - run: pip install git+https://github.com/huggingface/nlp
             - run: pip install .[sklearn,tf-cpu,torch,testing]
             - run: pip install codecov pytest-cov
             - save_cache:
@@ -104,6 +104,7 @@ jobs:
                       - v0.3-torch-{{ checksum "setup.py" }}
                       - v0.3-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
+            - run: pip install git+https://github.com/huggingface/nlp
             - run: pip install .[sklearn,torch,testing]
             - save_cache:
                   key: v0.3-torch-{{ checksum "setup.py" }}
@@ -128,6 +129,7 @@ jobs:
                       - v0.3-tf-{{ checksum "setup.py" }}
                       - v0.3-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
+            - run: pip install git+https://github.com/huggingface/nlp
             - run: pip install .[sklearn,tf-cpu,testing]
             - save_cache:
                   key: v0.3-tf-{{ checksum "setup.py" }}
@@ -151,6 +153,7 @@ jobs:
                       - v0.3-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
             - run: pip install .[ja,testing]
+            - run: python -m unidic download
             - save_cache:
                   key: v0.3-custom_tokenizers-{{ checksum "setup.py" }}
                   paths:
@@ -235,8 +238,7 @@ jobs:
                       - v0.3-code_quality-{{ checksum "setup.py" }}
                       - v0.3-{{ checksum "setup.py" }}
             - run: pip install --upgrade pip
-            # we need a version of isort with https://github.com/timothycrosley/isort/pull/1000
-            - run: pip install git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
+            - run: pip install isort
             - run: pip install .[tf,torch,quality]
             - save_cache:
                   key: v0.3-code_quality-{{ checksum "setup.py" }}
@@ -294,24 +296,13 @@ workflows:
     version: 2
     build_and_test:
         jobs:
-            - check_code_quality
-            - check_repository_consistency
-            - run_examples_torch
-            - run_tests_custom_tokenizers
-            - run_tests_torch_and_tf
-            - run_tests_torch
-            - run_tests_tf
-            - build_doc
-            - deploy_doc: *workflow_filters
-    tpu_testing_jobs:
-        triggers:
-            - schedule:
-                # Set to run at the first minute of every hour.
-                cron: "0 8 * * *"
-                filters:
-                    branches:
-                        only:
-                            - master
-        jobs:
-            - cleanup-gke-jobs
+            #- check_code_quality
+            #- check_repository_consistency
+            #- run_examples_torch
+            #- run_tests_custom_tokenizers
+            #- run_tests_torch_and_tf
+            #- run_tests_torch
+            #- run_tests_tf
             - run_examples_tpu
+            #- build_doc
+            #- deploy_doc: *workflow_filters
diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
@@ -44,8 +44,10 @@ assignees: ''
  Longformer/Reformer: @patrickvonplaten
  TransfoXL/XLNet: @TevenLeScao 
  examples/seq2seq: @sshleifer
- tensorflow: @jplu 
-documentation: @sgugger
+ examples/bert-loses-patience: @JetRunner
+ tensorflow: @jplu
+ examples/token-classification: @stefan-it
+ documentation: @sgugger
  -->
 
 ## Information

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,2 @@
+<!-- This line specifies which issue to close after the pull request is merged. -->
+Fixes #{issue number}
diff --git a/.github/workflows/self-push.yml b/.github/workflows/self-push.yml
@@ -45,7 +45,8 @@ jobs:
         source .env/bin/activate
         pip install --upgrade pip
         pip install torch!=1.6.0
-        pip install .[sklearn,testing]
+        pip install .[sklearn,testing,onnxruntime]
+        pip install git+https://github.com/huggingface/nlp
 
     - name: Are GPUs recognized by our DL frameworks
       run: |

diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml
@@ -42,7 +42,8 @@ jobs:
         source .env/bin/activate
         pip install --upgrade pip
         pip install torch!=1.6.0
-        pip install .[sklearn,testing]
+        pip install .[sklearn,testing,onnxruntime]
+        pip install git+https://github.com/huggingface/nlp
 
     - name: Are GPUs recognized by our DL frameworks
       run: |
@@ -58,6 +59,7 @@ jobs:
       run: |
         source .env/bin/activate
         python -m pytest -n 1 --dist=loadfile -s ./tests/
+
     - name: Run examples tests on GPU
       env:
         TF_FORCE_GPU_ALLOW_GROWTH: "true"

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -65,7 +65,7 @@ Awesome! Please provide the following information:
 If you are willing to contribute the model yourself, let us know so we can best
 guide you.
 
-We have added a **detailed guide and templates** to guide you in the process of adding a new model. You can find them 
+We have added a **detailed guide and templates** to guide you in the process of adding a new model. You can find them
 in the [`templates`](https://github.com/huggingface/transformers/tree/master/templates) folder.
 
 ### Do you want a new feature (that is not a model)?
@@ -87,8 +87,8 @@ A world-class feature request addresses the following points:
 If your issue is well written we're already 80% of the way there by the time you
 post it.
 
-We have added **templates** to guide you in the process of adding a new example script for training or testing the 
-models in the library. You can find them in the [`templates`](https://github.com/huggingface/transformers/tree/master/templates) 
+We have added **templates** to guide you in the process of adding a new example script for training or testing the
+models in the library. You can find them in the [`templates`](https://github.com/huggingface/transformers/tree/master/templates)
 folder.
 
 ## Start contributing! (Pull Requests)
@@ -134,12 +134,6 @@ Follow these steps to start contributing:
    it with `pip uninstall transformers` before reinstalling it in editable
    mode with the `-e` flag.)
 
-   Right now, we need an unreleased version of `isort` to avoid a
-   [bug](https://github.com/timothycrosley/isort/pull/1000):
-
-   ```bash
-   $ pip install -U git+git://github.com/timothycrosley/isort.git@e63ae06ec7d70b06df9e528357650281a3d3ec22#egg=isort
-   ```
 5. Develop the features on your branch.
 
    As you work on the features, you should make sure that the test suite
@@ -149,6 +143,14 @@ Follow these steps to start contributing:
    $ make test
    ```
 
+   Note, that this command uses `-n auto` pytest flag, therefore, it will start as many parallel `pytest` processes as the number of your computer's CPU-cores, and if you have lots of those and a few GPUs and not a great amount of RAM, it's likely to overload your computer. Therefore, to run the test suite, you may want to consider using this command instead:
+
+   ```bash
+   $ python -m pytest -n 3 --dist=loadfile -s -v ./tests/
+   ```
+
+   Adjust the value of `-n` to fit the load your hardware can support.
+
    `transformers` relies on `black` and `isort` to format its source code
    consistently. After you make changes, format them with:
 
@@ -163,6 +165,16 @@ Follow these steps to start contributing:
    $ make quality
    ```
 
+   If you're modifying documents under `docs/source`, make sure to validate that
+   they can still be built. This check also runs in CI. To run a local check
+   make sure you have installed the documentation builder requirements, by
+   running `pip install .[tf,torch,docs]` once from the root of this repository
+   and then run:
+
+   ```bash
+   $ make docs
+   ```
+
    Once you're happy with your changes, add changed files using `git add` and
    make a commit with `git commit` to record your changes locally:
 
@@ -208,21 +220,21 @@ Follow these steps to start contributing:
    are useful to avoid duplicated work, and to differentiate it from PRs ready
    to be merged;
 4. Make sure existing tests pass;
-5. Add high-coverage tests. No quality testing = no merge. 
-   - If you are adding a new model, make sure that you use 
+5. Add high-coverage tests. No quality testing = no merge.
+   - If you are adding a new model, make sure that you use
      `ModelTester.all_model_classes = (MyModel, MyModelWithLMHead,...)`, which triggers the common tests.
-   - If you are adding new `@slow` tests, make sure they pass using 
-     `RUN_SLOW=1 python -m pytest tests/test_my_new_model.py`. 
-   - If you are adding a new tokenizer, write tests, and make sure 
+   - If you are adding new `@slow` tests, make sure they pass using
+     `RUN_SLOW=1 python -m pytest tests/test_my_new_model.py`.
+   - If you are adding a new tokenizer, write tests, and make sure
      `RUN_SLOW=1 python -m pytest tests/test_tokenization_{your_model_name}.py` passes.
    CircleCI does not run the slow tests, but github actions does every night!
-6. All public methods must have informative docstrings that work nicely with sphinx. See `modeling_ctrl.py` for an 
+6. All public methods must have informative docstrings that work nicely with sphinx. See `modeling_ctrl.py` for an
    example.
 
 ### Tests
 
-An extensive test suite is included to test the library behavior and several examples. Library tests can be found in 
-the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the 
+An extensive test suite is included to test the library behavior and several examples. Library tests can be found in
+the [tests folder](https://github.com/huggingface/transformers/tree/master/tests) and examples tests in the
 [examples folder](https://github.com/huggingface/transformers/tree/master/examples).
 
 We like `pytest` and `pytest-xdist` because it's faster. From the root of the

diff --git a/Makefile b/Makefile
@@ -1,18 +1,18 @@
-.PHONY: quality style test test-examples
+.PHONY: quality style test test-examples docs
 
 # Check that source code meets quality standards
 
 quality:
 	black --check --line-length 119 --target-version py35 examples templates tests src utils
-	isort --check-only --recursive examples templates tests src utils
+	isort --check-only examples templates tests src utils
 	flake8 examples templates tests src utils
 	python utils/check_repo.py
 
 # Format source code automatically
 
 style:
 	black --line-length 119 --target-version py35 examples templates tests src utils
-	isort --recursive examples templates tests src utils
+	isort examples templates tests src utils
 
 # Run tests for the library
 
@@ -23,3 +23,8 @@ test:
 
 test-examples:
 	python -m pytest -n auto --dist=loadfile -s -v ./examples/
+
+# Check that docs can build
+
+docs:
+	cd docs && make html SPHINXOPTS="-W"
diff --git a/README.md b/README.md
@@ -167,8 +167,13 @@ At some point in the future, you'll be able to seamlessly move from pre-training
 19. **[Reformer](https://huggingface.co/transformers/model_doc/reformer.html)** (from Google Research) released with the paper [Reformer: The Efficient Transformer](https://arxiv.org/abs/2001.04451) by Nikita Kitaev, Łukasz Kaiser, Anselm Levskaya.
 20. **[MarianMT](https://huggingface.co/transformers/model_doc/marian.html)** Machine translation models trained using [OPUS](http://opus.nlpl.eu/) data by Jörg Tiedemann. The [Marian Framework](https://marian-nmt.github.io/) is being developed by the Microsoft Translator Team.
 21. **[Longformer](https://huggingface.co/transformers/model_doc/longformer.html)** (from AllenAI) released with the paper [Longformer: The Long-Document Transformer](https://arxiv.org/abs/2004.05150) by Iz Beltagy, Matthew E. Peters, Arman Cohan.
-22. **[Other community models](https://huggingface.co/models)**, contributed by the [community](https://huggingface.co/users).
-23. Want to contribute a new model? We have added a **detailed guide and templates** to guide you in the process of adding a new model. You can find them in the [`templates`](./templates) folder of the repository. Be sure to check the [contributing guidelines](./CONTRIBUTING.md) and contact the maintainers or open an issue to collect feedbacks before starting your PR.
+22. **[DPR](https://github.com/facebookresearch/DPR)** (from Facebook) released with the paper [Dense Passage Retrieval
+for Open-Domain Question Answering](https://arxiv.org/abs/2004.04906) by Vladimir Karpukhin, Barlas Oğuz, Sewon
+Min, Patrick Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-tau Yih.
+23. **[Pegasus](https://github.com/google-research/pegasus)** (from Google) released with the paper [PEGASUS: Pre-training with Extracted Gap-sentences for Abstractive Summarization](https://arxiv.org/abs/1912.08777)> by Jingqing Zhang, Yao Zhao, Mohammad Saleh and Peter J. Liu.
+24. **[MBart](https://github.com/pytorch/fairseq/tree/master/examples/mbart)** (from Facebook) released with the paper  [Multilingual Denoising Pre-training for Neural Machine Translation](https://arxiv.org/abs/2001.08210) by Yinhan Liu, Jiatao Gu, Naman Goyal, Xian Li, Sergey Edunov, Marjan Ghazvininejad, Mike Lewis, Luke Zettlemoyer.  
+25. **[Other community models](https://huggingface.co/models)**, contributed by the [community](https://huggingface.co/users).
+26. Want to contribute a new model? We have added a **detailed guide and templates** to guide you in the process of adding a new model. You can find them in the [`templates`](./templates) folder of the repository. Be sure to check the [contributing guidelines](./CONTRIBUTING.md) and contact the maintainers or open an issue to collect feedbacks before starting your PR.
 
 These implementations have been tested on several datasets (see the example scripts) and should match the performances of the original implementations (e.g. ~93 F1 on SQuAD for BERT Whole-Word-Masking, ~88 F1 on RocStories for OpenAI GPT, ~18.3 perplexity on WikiText 103 for Transformer-XL, ~0.916 Pearson R coefficient on STS-B for XLNet). You can find more details on the performances in the Examples section of the [documentation](https://huggingface.co/transformers/examples.html).
 
@@ -620,7 +625,7 @@ Breaking change in the `from_pretrained()` method:
 
 1. Models are now set in evaluation mode by default when instantiated with the `from_pretrained()` method. To train them, don't forget to set them back in training mode (`model.train()`) to activate the dropout modules.
 
-2. The additional `*input` and `**kwargs` arguments supplied to the `from_pretrained()` method used to be directly passed to the underlying model's class `__init__()` method. They are now used to update the model configuration attribute instead, which can break derived model classes built based on the previous `BertForSequenceClassification` examples. We are working on a way to mitigate this breaking change in [#866](https://github.com/huggingface/transformers/pull/866) by forwarding the the model's `__init__()` method (i) the provided positional arguments and (ii) the keyword arguments which do not match any configuration class attributes.
+2. The additional `*input` and `**kwargs` arguments supplied to the `from_pretrained()` method used to be directly passed to the underlying model's class `__init__()` method. They are now used to update the model configuration attribute instead, which can break derived model classes built based on the previous `BertForSequenceClassification` examples. We are working on a way to mitigate this breaking change in [#866](https://github.com/huggingface/transformers/pull/866) by forwarding the model's `__init__()` method (i) the provided positional arguments and (ii) the keyword arguments which do not match any configuration class attributes.
 
 Also, while not a breaking change, the serialization methods have been standardized and you probably should switch to the new method `save_pretrained(save_directory)` if you were using any other serialization method before.
 
@@ -693,11 +698,11 @@ for batch in train_data:
 
 ## Citation
 
-We now have a paper you can cite for the 🤗 Transformers library:
+We now have a [paper](https://arxiv.org/abs/1910.03771) you can cite for the 🤗 Transformers library:
 ```bibtex
 @article{Wolf2019HuggingFacesTS,
   title={HuggingFace's Transformers: State-of-the-art Natural Language Processing},
-  author={Thomas Wolf and Lysandre Debut and Victor Sanh and Julien Chaumond and Clement Delangue and Anthony Moi and Pierric Cistac and Tim Rault and R'emi Louf and Morgan Funtowicz and Jamie Brew},
+  author={Thomas Wolf and Lysandre Debut and Victor Sanh and Julien Chaumond and Clement Delangue and Anthony Moi and Pierric Cistac and Tim Rault and Rémi Louf and Morgan Funtowicz and Joe Davison and Sam Shleifer and Patrick von Platen and Clara Ma and Yacine Jernite and Julien Plu and Canwen Xu and Teven Le Scao and Sylvain Gugger and Mariama Drame and Quentin Lhoest and Alexander M. Rush},
   journal={ArXiv},
   year={2019},
   volume={abs/1910.03771}

diff --git a/codecov.yml b/codecov.yml
@@ -4,3 +4,7 @@ coverage:
       default:
         informational: true
     patch: off
+comment:
+  require_changes: true    # only comment if there was change in coverage
+  require_head: yes        # don't report if there is no head coverage report
+  require_base: yes        # don't report if there is no base coverage report
diff --git a/docs/source/_static/js/custom.js b/docs/source/_static/js/custom.js
@@ -24,6 +24,7 @@ const versionMapping = {
 // The page that have a notebook and therefore should have the open in colab badge.
 const hasNotebook = [
     "benchmarks",
+    "custom_datasets",
     "multilingual",
     "perplexity",
     "preprocessing",

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -76,7 +76,8 @@
 pygments_style = None
 
 # Remove the prompt when copying examples
-copybutton_prompt_text = ">>> "
+copybutton_prompt_text = r">>> |\.\.\. "
+copybutton_prompt_is_regexp = True
 
 # -- Options for HTML output -------------------------------------------------
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		<!-- This line specifies which issue to close after the pull request is merged. -->
		Fixes #{issue number}