Skip to content

Commit

Permalink
Swapping experimental Vocab and retiring current Vocab into legacy (#…
Browse files Browse the repository at this point in the history
…1289)

Summary: allow-large-files to commit wikitext103_vocab.pt

Reviewed By: cpuhrsch

Differential Revision: D28478152

fbshipit-source-id: c2a871439f054024b95c05f7664a84028aacaca3
  • Loading branch information
parmeet authored and facebook-github-bot committed May 19, 2021
1 parent 6231993 commit c8bced1
Show file tree
Hide file tree
Showing 26 changed files with 842 additions and 856 deletions.
2 changes: 1 addition & 1 deletion benchmark/benchmark_experimental_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
load_vocab_from_file,
build_vocab_from_text_file
)
from torchtext.vocab import (
from torchtext.legacy.vocab import (
Vocab,
build_vocab_from_iterator
)
Expand Down
7 changes: 0 additions & 7 deletions docs/source/experimental_vocab.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,6 @@ torchtext.experimental.vocab
.. automodule:: torchtext.experimental.vocab
.. currentmodule:: torchtext.experimental.vocab

:hidden:`Vocab`
~~~~~~~~~~~~~~~

.. autoclass:: Vocab
:members:
:special-members:

:hidden:`vocab`
~~~~~~~~~~~~~~~

Expand Down
17 changes: 1 addition & 16 deletions docs/source/vocab.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,8 @@ torchtext.vocab

.. autoclass:: Vocab
:members:
:special-members: __init__

:hidden:`SubwordVocab`
~~~~~~~~~~~~~~~~~~~~~~
:special-members:

.. autoclass:: SubwordVocab
:members:
:special-members: __init__

:hidden:`Vectors`
~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -48,12 +42,3 @@ Pretrained Word Embeddings

.. autoclass:: CharNGram
:members:

Misc.
-----

:hidden:`build_vocab_from_iterator`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autofunction:: build_vocab_from_iterator

2 changes: 1 addition & 1 deletion examples/BERT/mlm_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def run_main(args, rank=None):
except:
train_dataset, valid_dataset, test_dataset = WLMDataset()
old_vocab = train_dataset.vocab
vocab = torchtext.vocab.Vocab(counter=old_vocab.freqs,
vocab = torchtext.legacy.vocab.Vocab(counter=old_vocab.freqs,
specials=['<unk>', '<pad>', '<MASK>'])
with open(args.save_vocab, 'wb') as f:
torch.save(vocab, f)
Expand Down
2 changes: 1 addition & 1 deletion examples/BERT/qa_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def train():
except:
train_dataset, dev_dataset = SQuAD1()
old_vocab = train_dataset.vocab
vocab = torchtext.vocab.Vocab(counter=old_vocab.freqs,
vocab = torchtext.legacy.vocab.Vocab(counter=old_vocab.freqs,
specials=['<unk>', '<pad>', '<MASK>'])
with open(args.save_vocab, 'wb') as f:
torch.save(vocab, f)
Expand Down
4 changes: 2 additions & 2 deletions examples/data_pipeline/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def build_sp_pipeline(args):
def build_legacy_torchtext_vocab_pipeline(args):
vocab_file = args.vocab_filename
tokenizer = get_tokenizer("basic_english")
from torchtext.vocab import build_vocab_from_iterator
from torchtext.legacy.vocab import build_vocab_from_iterator

def token_iterator(vocab_file):
f = open(vocab_file, 'r')
Expand All @@ -72,7 +72,7 @@ def build_experimental_torchtext_pipeline(args):
def build_legacy_batch_torchtext_vocab_pipeline(args):
vocab_file = args.vocab_filename
tokenizer = get_tokenizer("basic_english")
from torchtext.vocab import build_vocab_from_iterator
from torchtext.legacy.vocab import build_vocab_from_iterator

def token_iterator(vocab_file):
f = open(vocab_file, 'r')
Expand Down
3 changes: 2 additions & 1 deletion examples/vocab/pytext_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

from fairseq.data.dictionary import Dictionary
import torch
from torchtext.experimental.vocab import vocab, Vocab
from torchtext.experimental.vocab import vocab
from torchtext.vocab import Vocab
from typing import Dict, List, Optional


Expand Down
2 changes: 1 addition & 1 deletion examples/vocab/vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
import io

from torchtext.vocab import build_vocab_from_iterator
from torchtext.legacy.vocab import build_vocab_from_iterator
from torchtext.data.utils import ngrams_iterator
from torchtext.data.utils import get_tokenizer
from torchtext.utils import unicode_csv_reader
Expand Down
Binary file modified test/asset/wikitext103_vocab.pt
Binary file not shown.
6 changes: 3 additions & 3 deletions test/data/test_builtin_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ def test_next_method_dataset(self):

def test_imdb(self):
from torchtext.experimental.datasets import IMDB
from torchtext.vocab import Vocab
from torchtext.legacy.vocab import Vocab
# smoke test to ensure imdb works properly
train_dataset, test_dataset = IMDB()
self._helper_test_func(len(train_dataset), 25000, train_dataset[0][1][:10],
Expand Down Expand Up @@ -465,7 +465,7 @@ def test_conll_sequence_tagging(self):

def test_squad1(self):
from torchtext.experimental.datasets import SQuAD1
from torchtext.vocab import Vocab
from torchtext.legacy.vocab import Vocab
# smoke test to ensure imdb works properly
train_dataset, dev_dataset = SQuAD1()
context, question, answers, ans_pos = train_dataset[100]
Expand Down Expand Up @@ -494,7 +494,7 @@ def test_squad1(self):

def test_squad2(self):
from torchtext.experimental.datasets import SQuAD2
from torchtext.vocab import Vocab
from torchtext.legacy.vocab import Vocab
# smoke test to ensure imdb works properly
train_dataset, dev_dataset = SQuAD2()
context, question, answers, ans_pos = train_dataset[200]
Expand Down
265 changes: 0 additions & 265 deletions test/experimental/test_vocab.py

This file was deleted.

Loading

0 comments on commit c8bced1

Please sign in to comment.