Merge branch 'master' into migration_tutorial

pytorch · Feb 24, 2021 · 906e9cc · 906e9cc
2 parents 4c0a75a + db8da95
commit 906e9cc
Show file tree

Hide file tree

Showing 30 changed files with 1,553 additions and 866 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -541,6 +541,23 @@ jobs:
             target=${tag:-master}
             ~/workspace/.circleci/build_docs/commit_docs.sh ~/workspace $target
 
+  docstring_parameters_sync:
+    <<: *binary_common
+    docker:
+      - image: continuumio/miniconda3
+    resource_class: medium
+    steps:
+      - attach_workspace:
+          at: ~/workspace
+      - designate_upload_channel
+      - checkout
+      - run:
+          name: Check parameters docstring sync
+          command: |
+            pip install --user pydocstyle
+            export PATH="$HOME/.local/bin:$PATH"
+            pydocstyle torchtext
+
 
 workflows:
   build:
@@ -634,6 +651,11 @@ workflows:
           python_version: '3.8'
           requires:
           - build_docs
+      - docstring_parameters_sync:
+          name: docstring_parameters_sync
+          python_version: '3.8'
+          requires:
+          - binary_linux_wheel_py3.8
   unittest:
     jobs:
       - unittest_linux:

diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in
@@ -541,6 +541,23 @@ jobs:
             target=${tag:-master}
             ~/workspace/.circleci/build_docs/commit_docs.sh ~/workspace $target
 
+  docstring_parameters_sync:
+    <<: *binary_common
+    docker:
+      - image: continuumio/miniconda3
+    resource_class: medium
+    steps:
+      - attach_workspace:
+          at: ~/workspace
+      - designate_upload_channel
+      - checkout
+      - run:
+          name: Check parameters docstring sync
+          command: |
+            pip install --user pydocstyle
+            export PATH="$HOME/.local/bin:$PATH"
+            pydocstyle torchtext
+
 
 workflows:
   build:

diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py
@@ -35,6 +35,7 @@ def build_workflows(prefix='', upload=False, filter_branch=None, indentation=6):
         # Build on every pull request, but upload only on nightly and tags
         w += build_doc_job(None)
         w += upload_doc_job('nightly')
+        w += docstring_parameters_sync_job(None)
 
     return indent(indentation, w)
 
@@ -77,6 +78,18 @@ def upload_doc_job(filter_branch):
     return [{"upload_docs": job}]
 
 
+def docstring_parameters_sync_job(filter_branch):
+    job = {
+        "name": "docstring_parameters_sync",
+        "python_version": "3.8",
+        "requires": ["binary_linux_wheel_py3.8", ],
+    }
+
+    if filter_branch:
+        job["filters"] = gen_filter_branch_tree(filter_branch)
+    return [{"docstring_parameters_sync": job}]
+
+
 def generate_base_workflow(base_workflow_name, python_version, filter_branch, os_type, btype):
     d = {
         "name": base_workflow_name,

diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -98,25 +98,19 @@ PennTreebank
 .. autofunction:: PennTreebank
 
 
-WMTNewsCrawl
-~~~~~~~~~~~~
-
-.. autofunction:: WMTNewsCrawl
-
-
 Machine Translation
 ^^^^^^^^^^^^^^^^^^^
 
-Multi30k
-~~~~~~~~
 
-.. autofunction:: Multi30k
+IWSLT2016
+~~~~~~~~~
 
+.. autofunction:: IWSLT2016
 
-IWSLT
-~~~~~
+IWSLT2017
+~~~~~~~~~
 
-.. autofunction:: IWSLT
+.. autofunction:: IWSLT2017
 
 
 WMT14

diff --git a/docs/source/examples.rst b/docs/source/examples.rst
diff --git a/docs/source/experimental_datasets.rst b/docs/source/experimental_datasets.rst
@@ -132,11 +132,15 @@ Multi30k
 
 .. autofunction:: Multi30k
 
+IWSLT2016
+~~~~~~~~~
 
-IWSLT
-~~~~~
+.. autofunction:: IWSLT2016
+
+IWSLT2017
+~~~~~~~~~
 
-.. autofunction:: IWSLT
+.. autofunction:: IWSLT2017
 
 
 WMT14

diff --git a/docs/source/experimental_datasets_raw.rst b/docs/source/experimental_datasets_raw.rst
@@ -0,0 +1,38 @@
+torchtext.experimental.datasets.raw
+===================================
+
+.. currentmodule:: torchtext.experimental.datasets.raw
+
+General use cases are as follows: ::
+
+
+    # import datasets
+    from torchtext.experimental.datasets.raw import Multi30k
+
+    train_iter = Multi30k(split='train')
+
+    def tokenize(label, line):
+        return line.split()
+     
+    tokens_src = []
+    tokens_tgt = []
+    
+    for line in train_iter:
+        src, tgt = line
+        tokens_src += tokenize(src)
+        tokens_tgt += tokenize(tgt)
+
+The following datasets are available:
+
+.. contents:: Datasets
+    :local:
+
+
+Machine Translation
+^^^^^^^^^^^^^^^^^^^
+
+Multi30k
+~~~~~~~~
+
+.. autofunction:: Multi30k
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -40,11 +40,11 @@ popular datasets for natural language.
    torchtext.vocab <vocab>
    torchtext.utils <utils>
    experimental_datasets
+   experimental_datasets_raw
    experimental_transforms
    experimental_vectors
    experimental_vocab
    models_utils
-   examples <examples>
 
 .. automodule:: torchtext
    :members:

diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,2 @@
+[pydocstyle]
+select = D417 # Missing argument descriptions in the docstring
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[pydocstyle]
		select = D417 # Missing argument descriptions in the docstring