Merge remote-tracking branch 'huggingface/master'

# Conflicts: # pytorch_transformers/__init__.py
huggingface · Sep 16, 2019 · 84b9d1c · 84b9d1c
2 parents 6e1ac34 + 32e1332
commit 84b9d1c
Show file tree

Hide file tree

Showing 88 changed files with 4,516 additions and 3,908 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -4,8 +4,8 @@ jobs:
         working_directory: ~/pytorch-transformers
         docker:
             - image: circleci/python:3.5
-        resource_class: large
-        parallelism: 4
+        resource_class: xlarge
+        parallelism: 1
         steps:
             - checkout
             - run: sudo pip install --progress-bar off .
@@ -17,7 +17,7 @@ jobs:
     build_py2:
         working_directory: ~/pytorch-transformers
         resource_class: large
-        parallelism: 4
+        parallelism: 1
         docker:
             - image: circleci/python:2.7
         steps:
@@ -26,9 +26,28 @@ jobs:
             - run: sudo pip install pytest codecov pytest-cov
             - run: python -m pytest -sv ./pytorch_transformers/tests/ --cov
             - run: codecov
+    deploy_doc:
+        working_directory: ~/pytorch-transformers
+        docker:
+            - image: circleci/python:3.5
+        steps:
+            - add_ssh_keys:
+                  fingerprints:
+                      - "5b:7a:95:18:07:8c:aa:76:4c:60:35:88:ad:60:56:71"
+            - checkout
+            - run: sudo pip install --progress-bar off -r docs/requirements.txt
+            - run: sudo pip install --progress-bar off -r requirements.txt
+            - run: cd docs/source && ln -s ../../examples/README.md examples.md && cd -
+            - run: cd docs && make clean && make html && scp -r -oStrictHostKeyChecking=no _build/html/* $doc:$dir
+workflow_filters: &workflow_filters
+    filters:
+        branches:
+            only:
+                - master
 workflows:
-  version: 2
-  build_and_test:
-    jobs:
-      - build_py3
-      - build_py2
+    version: 2
+    build_and_test:
+        jobs:
+            - build_py3
+            - build_py2
+            - deploy_doc: *workflow_filters
diff --git a/.gitignore b/.gitignore
@@ -130,4 +130,5 @@ runs
 examples/runs
 
 # data
-data
+data
+serialization_dir
diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ These implementations have been tested on several datasets (see the example scri
 | Section | Description |
 |-|-|
 | [Installation](#installation) | How to install the package |
+| [Online demo](#online-demo) | Experimenting with this repo’s text generation capabilities |
 | [Quick tour: Usage](#quick-tour) | Tokenizers & models usage: Bert and GPT-2 |
 | [Quick tour: Fine-tuning/usage scripts](#quick-tour-of-the-fine-tuningusage-scripts) | Using provided scripts: GLUE, SQuAD and Text generation |
 | [Migrating from pytorch-pretrained-bert to pytorch-transformers](#Migrating-from-pytorch-pretrained-bert-to-pytorch-transformers) | Migrating your code from pytorch-pretrained-bert to pytorch-transformers |
@@ -68,6 +69,14 @@ It contains an example of a conversion script from a Pytorch trained Transformer
 At some point in the future, you'll be able to seamlessly move from pre-training or fine-tuning models in PyTorch to productizing them in CoreML,
 or prototype a model or an app in CoreML then research its hyperparameters or architecture from PyTorch. Super exciting!
 
+## Online demo
+
+**[Write With Transformer](https://transformer.huggingface.co)**, built by the Hugging Face team at transformer.huggingface.co, is the official demo of this repo’s text generation capabilities.
+You can use it to experiment with completions generated by `GPT2Model`, `TransfoXLModel`, and `XLNetModel`.
+
+> “🦄 Write with transformer is to writing what calculators are to calculus.”
+
+![write_with_transformer](https://transformer.huggingface.co/front/assets/thumbnail-large.png)
 
 ## Quick tour
 
@@ -95,7 +104,7 @@ for model_class, tokenizer_class, pretrained_weights in MODELS:
     model = model_class.from_pretrained(pretrained_weights)
 
     # Encode text
-    input_ids = torch.tensor([tokenizer.encode("Here is some text to encode")])
+    input_ids = torch.tensor([tokenizer.encode("Here is some text to encode", add_special_tokens=True)])  # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
     with torch.no_grad():
         last_hidden_states = model(input_ids)[0]  # Models outputs are now tuples
 

diff --git a/docs/README.md b/docs/README.md
@@ -34,6 +34,13 @@ pip install recommonmark
 
 ## Building the documentation
 
+Make sure that there is a symlink from the `example` file (in /examples) inside the source folder. Run the followig 
+command to generate it:
+
+```bash
+ln -s ../../examples/README.md source/examples.md
+```
+
 Once you have setup `sphinx`, you can build the documentation by running the following command in the `/docs` folder:
 
 ```bash

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -26,3 +26,4 @@ sphinxcontrib-jsmath==1.0.1
 sphinxcontrib-qthelp==1.0.2
 sphinxcontrib-serializinghtml==1.1.3
 urllib3==1.25.3
+sphinx-markdown-tables==0.0.9
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -26,7 +26,7 @@
 # The short X.Y version
 version = u''
 # The full version, including alpha/beta/rc tags
-release = u'1.0.0'
+release = u'1.2.0'
 
 
 # -- General configuration ---------------------------------------------------
@@ -43,7 +43,8 @@
     'sphinx.ext.coverage',
     'sphinx.ext.napoleon',
     'recommonmark',
-    'sphinx.ext.viewcode'
+    'sphinx.ext.viewcode',
+    'sphinx_markdown_tables'
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-Original file line number
+Diff line change
@@ Expand Up / @@ -130,4 +130,5 @@ runs @@
     examples/runs
     # data
-    data
+    data
+    serialization_dir